1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BASELINE 3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE1 4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2 5 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP 6 7 ; https://bugs.llvm.org/show_bug.cgi?id=37104 8 9 ; All the advanced stuff (negative tests, commutativity) is handled in the 10 ; scalar version of the test only. 11 12 ; ============================================================================ ; 13 ; 8-bit vector width 14 ; ============================================================================ ; 15 16 define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { 17 ; CHECK-LABEL: out_v1i8: 18 ; CHECK: # %bb.0: 19 ; CHECK-NEXT: andl %edx, %edi 20 ; CHECK-NEXT: notb %dl 21 ; CHECK-NEXT: andb %sil, %dl 22 ; CHECK-NEXT: orb %dil, %dl 23 ; CHECK-NEXT: movl %edx, %eax 24 ; CHECK-NEXT: retq 25 %mx = and <1 x i8> %x, %mask 26 %notmask = xor <1 x i8> %mask, <i8 -1> 27 %my = and <1 x i8> %y, %notmask 28 %r = or <1 x i8> %mx, %my 29 ret <1 x i8> %r 30 } 31 32 ; ============================================================================ ; 33 ; 16-bit vector width 34 ; ============================================================================ ; 35 36 define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { 37 ; CHECK-BASELINE-LABEL: out_v2i8: 38 ; CHECK-BASELINE: # %bb.0: 39 ; CHECK-BASELINE-NEXT: andl %r8d, %edi 40 ; CHECK-BASELINE-NEXT: andl %r9d, %esi 41 ; CHECK-BASELINE-NEXT: notb %r8b 42 ; CHECK-BASELINE-NEXT: notb %r9b 43 ; CHECK-BASELINE-NEXT: andb %cl, %r9b 44 ; CHECK-BASELINE-NEXT: andb %dl, %r8b 45 ; CHECK-BASELINE-NEXT: orb %dil, %r8b 46 ; CHECK-BASELINE-NEXT: orb %sil, %r9b 47 ; CHECK-BASELINE-NEXT: movl %r8d, %eax 48 ; CHECK-BASELINE-NEXT: movl %r9d, %edx 49 ; CHECK-BASELINE-NEXT: retq 50 ; 51 ; CHECK-SSE1-LABEL: out_v2i8: 52 ; CHECK-SSE1: # %bb.0: 53 ; CHECK-SSE1-NEXT: andl %r8d, %edi 54 ; CHECK-SSE1-NEXT: andl %r9d, %esi 55 ; CHECK-SSE1-NEXT: notb %r8b 56 ; CHECK-SSE1-NEXT: notb %r9b 57 ; CHECK-SSE1-NEXT: andb %cl, %r9b 58 ; CHECK-SSE1-NEXT: andb %dl, %r8b 59 ; CHECK-SSE1-NEXT: orb %dil, %r8b 60 ; CHECK-SSE1-NEXT: orb %sil, %r9b 61 ; CHECK-SSE1-NEXT: movl %r8d, %eax 62 ; CHECK-SSE1-NEXT: movl %r9d, %edx 63 ; CHECK-SSE1-NEXT: retq 64 ; 65 ; CHECK-SSE2-LABEL: out_v2i8: 66 ; CHECK-SSE2: # %bb.0: 67 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 68 ; CHECK-SSE2-NEXT: xorps {{.*}}(%rip), %xmm2 69 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 70 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 71 ; CHECK-SSE2-NEXT: retq 72 ; 73 ; CHECK-XOP-LABEL: out_v2i8: 74 ; CHECK-XOP: # %bb.0: 75 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm0, %xmm0 76 ; CHECK-XOP-NEXT: vxorps {{.*}}(%rip), %xmm2, %xmm2 77 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm1, %xmm1 78 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 79 ; CHECK-XOP-NEXT: retq 80 %mx = and <2 x i8> %x, %mask 81 %notmask = xor <2 x i8> %mask, <i8 -1, i8 -1> 82 %my = and <2 x i8> %y, %notmask 83 %r = or <2 x i8> %mx, %my 84 ret <2 x i8> %r 85 } 86 87 define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { 88 ; CHECK-LABEL: out_v1i16: 89 ; CHECK: # %bb.0: 90 ; CHECK-NEXT: andl %edx, %edi 91 ; CHECK-NEXT: notl %edx 92 ; CHECK-NEXT: andl %esi, %edx 93 ; CHECK-NEXT: orl %edi, %edx 94 ; CHECK-NEXT: movl %edx, %eax 95 ; CHECK-NEXT: retq 96 %mx = and <1 x i16> %x, %mask 97 %notmask = xor <1 x i16> %mask, <i16 -1> 98 %my = and <1 x i16> %y, %notmask 99 %r = or <1 x i16> %mx, %my 100 ret <1 x i16> %r 101 } 102 103 ; ============================================================================ ; 104 ; 32-bit vector width 105 ; ============================================================================ ; 106 107 define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 108 ; CHECK-BASELINE-LABEL: out_v4i8: 109 ; CHECK-BASELINE: # %bb.0: 110 ; CHECK-BASELINE-NEXT: pushq %rbx 111 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 112 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 113 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 114 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 115 ; CHECK-BASELINE-NEXT: andb %bl, %r8b 116 ; CHECK-BASELINE-NEXT: andb %al, %cl 117 ; CHECK-BASELINE-NEXT: andb %r11b, %dl 118 ; CHECK-BASELINE-NEXT: andb %r10b, %sil 119 ; CHECK-BASELINE-NEXT: notb %r11b 120 ; CHECK-BASELINE-NEXT: notb %al 121 ; CHECK-BASELINE-NEXT: notb %bl 122 ; CHECK-BASELINE-NEXT: notb %r10b 123 ; CHECK-BASELINE-NEXT: andb %r9b, %r10b 124 ; CHECK-BASELINE-NEXT: orb %sil, %r10b 125 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 126 ; CHECK-BASELINE-NEXT: orb %r8b, %bl 127 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 128 ; CHECK-BASELINE-NEXT: orb %cl, %al 129 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 130 ; CHECK-BASELINE-NEXT: orb %dl, %r11b 131 ; CHECK-BASELINE-NEXT: movb %bl, 3(%rdi) 132 ; CHECK-BASELINE-NEXT: movb %al, 2(%rdi) 133 ; CHECK-BASELINE-NEXT: movb %r11b, 1(%rdi) 134 ; CHECK-BASELINE-NEXT: movb %r10b, (%rdi) 135 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 136 ; CHECK-BASELINE-NEXT: popq %rbx 137 ; CHECK-BASELINE-NEXT: retq 138 ; 139 ; CHECK-SSE1-LABEL: out_v4i8: 140 ; CHECK-SSE1: # %bb.0: 141 ; CHECK-SSE1-NEXT: pushq %rbx 142 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 143 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 144 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 145 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 146 ; CHECK-SSE1-NEXT: andb %bl, %r8b 147 ; CHECK-SSE1-NEXT: andb %al, %cl 148 ; CHECK-SSE1-NEXT: andb %r11b, %dl 149 ; CHECK-SSE1-NEXT: andb %r10b, %sil 150 ; CHECK-SSE1-NEXT: notb %r11b 151 ; CHECK-SSE1-NEXT: notb %al 152 ; CHECK-SSE1-NEXT: notb %bl 153 ; CHECK-SSE1-NEXT: notb %r10b 154 ; CHECK-SSE1-NEXT: andb %r9b, %r10b 155 ; CHECK-SSE1-NEXT: orb %sil, %r10b 156 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 157 ; CHECK-SSE1-NEXT: orb %r8b, %bl 158 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 159 ; CHECK-SSE1-NEXT: orb %cl, %al 160 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 161 ; CHECK-SSE1-NEXT: orb %dl, %r11b 162 ; CHECK-SSE1-NEXT: movb %bl, 3(%rdi) 163 ; CHECK-SSE1-NEXT: movb %al, 2(%rdi) 164 ; CHECK-SSE1-NEXT: movb %r11b, 1(%rdi) 165 ; CHECK-SSE1-NEXT: movb %r10b, (%rdi) 166 ; CHECK-SSE1-NEXT: movq %rdi, %rax 167 ; CHECK-SSE1-NEXT: popq %rbx 168 ; CHECK-SSE1-NEXT: retq 169 ; 170 ; CHECK-SSE2-LABEL: out_v4i8: 171 ; CHECK-SSE2: # %bb.0: 172 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 173 ; CHECK-SSE2-NEXT: xorps {{.*}}(%rip), %xmm2 174 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 175 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 176 ; CHECK-SSE2-NEXT: retq 177 ; 178 ; CHECK-XOP-LABEL: out_v4i8: 179 ; CHECK-XOP: # %bb.0: 180 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm0, %xmm0 181 ; CHECK-XOP-NEXT: vxorps {{.*}}(%rip), %xmm2, %xmm2 182 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm1, %xmm1 183 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 184 ; CHECK-XOP-NEXT: retq 185 %mx = and <4 x i8> %x, %mask 186 %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1> 187 %my = and <4 x i8> %y, %notmask 188 %r = or <4 x i8> %mx, %my 189 ret <4 x i8> %r 190 } 191 192 define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 193 ; CHECK-BASELINE-LABEL: out_v4i8_undef: 194 ; CHECK-BASELINE: # %bb.0: 195 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 196 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 197 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 198 ; CHECK-BASELINE-NEXT: andb %al, %r8b 199 ; CHECK-BASELINE-NEXT: andb %r11b, %dl 200 ; CHECK-BASELINE-NEXT: andb %r10b, %sil 201 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 202 ; CHECK-BASELINE-NEXT: notb %r11b 203 ; CHECK-BASELINE-NEXT: notb %al 204 ; CHECK-BASELINE-NEXT: notb %r10b 205 ; CHECK-BASELINE-NEXT: andb %r9b, %r10b 206 ; CHECK-BASELINE-NEXT: orb %sil, %r10b 207 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 208 ; CHECK-BASELINE-NEXT: orb %r8b, %al 209 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 210 ; CHECK-BASELINE-NEXT: orb %dl, %r11b 211 ; CHECK-BASELINE-NEXT: movb %cl, 2(%rdi) 212 ; CHECK-BASELINE-NEXT: movb %al, 3(%rdi) 213 ; CHECK-BASELINE-NEXT: movb %r11b, 1(%rdi) 214 ; CHECK-BASELINE-NEXT: movb %r10b, (%rdi) 215 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 216 ; CHECK-BASELINE-NEXT: retq 217 ; 218 ; CHECK-SSE1-LABEL: out_v4i8_undef: 219 ; CHECK-SSE1: # %bb.0: 220 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 221 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 222 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 223 ; CHECK-SSE1-NEXT: andb %al, %r8b 224 ; CHECK-SSE1-NEXT: andb %r11b, %dl 225 ; CHECK-SSE1-NEXT: andb %r10b, %sil 226 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 227 ; CHECK-SSE1-NEXT: notb %r11b 228 ; CHECK-SSE1-NEXT: notb %al 229 ; CHECK-SSE1-NEXT: notb %r10b 230 ; CHECK-SSE1-NEXT: andb %r9b, %r10b 231 ; CHECK-SSE1-NEXT: orb %sil, %r10b 232 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 233 ; CHECK-SSE1-NEXT: orb %r8b, %al 234 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 235 ; CHECK-SSE1-NEXT: orb %dl, %r11b 236 ; CHECK-SSE1-NEXT: movb %cl, 2(%rdi) 237 ; CHECK-SSE1-NEXT: movb %al, 3(%rdi) 238 ; CHECK-SSE1-NEXT: movb %r11b, 1(%rdi) 239 ; CHECK-SSE1-NEXT: movb %r10b, (%rdi) 240 ; CHECK-SSE1-NEXT: movq %rdi, %rax 241 ; CHECK-SSE1-NEXT: retq 242 ; 243 ; CHECK-SSE2-LABEL: out_v4i8_undef: 244 ; CHECK-SSE2: # %bb.0: 245 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 246 ; CHECK-SSE2-NEXT: xorps {{.*}}(%rip), %xmm2 247 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 248 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 249 ; CHECK-SSE2-NEXT: retq 250 ; 251 ; CHECK-XOP-LABEL: out_v4i8_undef: 252 ; CHECK-XOP: # %bb.0: 253 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm0, %xmm0 254 ; CHECK-XOP-NEXT: vxorps {{.*}}(%rip), %xmm2, %xmm2 255 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm1, %xmm1 256 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 257 ; CHECK-XOP-NEXT: retq 258 %mx = and <4 x i8> %x, %mask 259 %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1> 260 %my = and <4 x i8> %y, %notmask 261 %r = or <4 x i8> %mx, %my 262 ret <4 x i8> %r 263 } 264 265 define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { 266 ; CHECK-BASELINE-LABEL: out_v2i16: 267 ; CHECK-BASELINE: # %bb.0: 268 ; CHECK-BASELINE-NEXT: andl %r9d, %esi 269 ; CHECK-BASELINE-NEXT: andl %r8d, %edi 270 ; CHECK-BASELINE-NEXT: notl %r8d 271 ; CHECK-BASELINE-NEXT: notl %r9d 272 ; CHECK-BASELINE-NEXT: andl %ecx, %r9d 273 ; CHECK-BASELINE-NEXT: orl %esi, %r9d 274 ; CHECK-BASELINE-NEXT: andl %edx, %r8d 275 ; CHECK-BASELINE-NEXT: orl %edi, %r8d 276 ; CHECK-BASELINE-NEXT: movl %r8d, %eax 277 ; CHECK-BASELINE-NEXT: movl %r9d, %edx 278 ; CHECK-BASELINE-NEXT: retq 279 ; 280 ; CHECK-SSE1-LABEL: out_v2i16: 281 ; CHECK-SSE1: # %bb.0: 282 ; CHECK-SSE1-NEXT: andl %r9d, %esi 283 ; CHECK-SSE1-NEXT: andl %r8d, %edi 284 ; CHECK-SSE1-NEXT: notl %r8d 285 ; CHECK-SSE1-NEXT: notl %r9d 286 ; CHECK-SSE1-NEXT: andl %ecx, %r9d 287 ; CHECK-SSE1-NEXT: orl %esi, %r9d 288 ; CHECK-SSE1-NEXT: andl %edx, %r8d 289 ; CHECK-SSE1-NEXT: orl %edi, %r8d 290 ; CHECK-SSE1-NEXT: movl %r8d, %eax 291 ; CHECK-SSE1-NEXT: movl %r9d, %edx 292 ; CHECK-SSE1-NEXT: retq 293 ; 294 ; CHECK-SSE2-LABEL: out_v2i16: 295 ; CHECK-SSE2: # %bb.0: 296 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 297 ; CHECK-SSE2-NEXT: xorps {{.*}}(%rip), %xmm2 298 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 299 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 300 ; CHECK-SSE2-NEXT: retq 301 ; 302 ; CHECK-XOP-LABEL: out_v2i16: 303 ; CHECK-XOP: # %bb.0: 304 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm0, %xmm0 305 ; CHECK-XOP-NEXT: vxorps {{.*}}(%rip), %xmm2, %xmm2 306 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm1, %xmm1 307 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 308 ; CHECK-XOP-NEXT: retq 309 %mx = and <2 x i16> %x, %mask 310 %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1> 311 %my = and <2 x i16> %y, %notmask 312 %r = or <2 x i16> %mx, %my 313 ret <2 x i16> %r 314 } 315 316 define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { 317 ; CHECK-LABEL: out_v1i32: 318 ; CHECK: # %bb.0: 319 ; CHECK-NEXT: andl %edx, %edi 320 ; CHECK-NEXT: notl %edx 321 ; CHECK-NEXT: andl %esi, %edx 322 ; CHECK-NEXT: orl %edi, %edx 323 ; CHECK-NEXT: movl %edx, %eax 324 ; CHECK-NEXT: retq 325 %mx = and <1 x i32> %x, %mask 326 %notmask = xor <1 x i32> %mask, <i32 -1> 327 %my = and <1 x i32> %y, %notmask 328 %r = or <1 x i32> %mx, %my 329 ret <1 x i32> %r 330 } 331 332 ; ============================================================================ ; 333 ; 64-bit vector width 334 ; ============================================================================ ; 335 336 define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { 337 ; CHECK-BASELINE-LABEL: out_v8i8: 338 ; CHECK-BASELINE: # %bb.0: 339 ; CHECK-BASELINE-NEXT: pushq %rbp 340 ; CHECK-BASELINE-NEXT: pushq %r15 341 ; CHECK-BASELINE-NEXT: pushq %r14 342 ; CHECK-BASELINE-NEXT: pushq %r13 343 ; CHECK-BASELINE-NEXT: pushq %r12 344 ; CHECK-BASELINE-NEXT: pushq %rbx 345 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl 346 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b 347 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b 348 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 349 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 350 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b 351 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 352 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 353 ; CHECK-BASELINE-NEXT: andb %al, %r9b 354 ; CHECK-BASELINE-NEXT: andb %bl, %r8b 355 ; CHECK-BASELINE-NEXT: andb %r14b, %cl 356 ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 357 ; CHECK-BASELINE-NEXT: andb %r11b, %dl 358 ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 359 ; CHECK-BASELINE-NEXT: andb %r10b, %sil 360 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b 361 ; CHECK-BASELINE-NEXT: andb %r12b, %r13b 362 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 363 ; CHECK-BASELINE-NEXT: andb %r15b, %cl 364 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl 365 ; CHECK-BASELINE-NEXT: andb %bpl, %dl 366 ; CHECK-BASELINE-NEXT: notb %r10b 367 ; CHECK-BASELINE-NEXT: notb %r11b 368 ; CHECK-BASELINE-NEXT: notb %r14b 369 ; CHECK-BASELINE-NEXT: notb %bl 370 ; CHECK-BASELINE-NEXT: notb %al 371 ; CHECK-BASELINE-NEXT: notb %bpl 372 ; CHECK-BASELINE-NEXT: notb %r15b 373 ; CHECK-BASELINE-NEXT: notb %r12b 374 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b 375 ; CHECK-BASELINE-NEXT: orb %r13b, %r12b 376 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b 377 ; CHECK-BASELINE-NEXT: orb %cl, %r15b 378 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl 379 ; CHECK-BASELINE-NEXT: orb %dl, %bpl 380 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 381 ; CHECK-BASELINE-NEXT: orb %r9b, %al 382 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 383 ; CHECK-BASELINE-NEXT: orb %r8b, %bl 384 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b 385 ; CHECK-BASELINE-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %r14b # 1-byte Folded Reload 386 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 387 ; CHECK-BASELINE-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %r11b # 1-byte Folded Reload 388 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 389 ; CHECK-BASELINE-NEXT: orb %sil, %r10b 390 ; CHECK-BASELINE-NEXT: movb %r12b, 7(%rdi) 391 ; CHECK-BASELINE-NEXT: movb %r15b, 6(%rdi) 392 ; CHECK-BASELINE-NEXT: movb %bpl, 5(%rdi) 393 ; CHECK-BASELINE-NEXT: movb %al, 4(%rdi) 394 ; CHECK-BASELINE-NEXT: movb %bl, 3(%rdi) 395 ; CHECK-BASELINE-NEXT: movb %r14b, 2(%rdi) 396 ; CHECK-BASELINE-NEXT: movb %r11b, 1(%rdi) 397 ; CHECK-BASELINE-NEXT: movb %r10b, (%rdi) 398 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 399 ; CHECK-BASELINE-NEXT: popq %rbx 400 ; CHECK-BASELINE-NEXT: popq %r12 401 ; CHECK-BASELINE-NEXT: popq %r13 402 ; CHECK-BASELINE-NEXT: popq %r14 403 ; CHECK-BASELINE-NEXT: popq %r15 404 ; CHECK-BASELINE-NEXT: popq %rbp 405 ; CHECK-BASELINE-NEXT: retq 406 ; 407 ; CHECK-SSE1-LABEL: out_v8i8: 408 ; CHECK-SSE1: # %bb.0: 409 ; CHECK-SSE1-NEXT: pushq %rbp 410 ; CHECK-SSE1-NEXT: pushq %r15 411 ; CHECK-SSE1-NEXT: pushq %r14 412 ; CHECK-SSE1-NEXT: pushq %r13 413 ; CHECK-SSE1-NEXT: pushq %r12 414 ; CHECK-SSE1-NEXT: pushq %rbx 415 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 416 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 417 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 418 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 419 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 420 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 421 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 422 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 423 ; CHECK-SSE1-NEXT: andb %al, %r9b 424 ; CHECK-SSE1-NEXT: andb %bl, %r8b 425 ; CHECK-SSE1-NEXT: andb %r14b, %cl 426 ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 427 ; CHECK-SSE1-NEXT: andb %r11b, %dl 428 ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 429 ; CHECK-SSE1-NEXT: andb %r10b, %sil 430 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b 431 ; CHECK-SSE1-NEXT: andb %r12b, %r13b 432 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 433 ; CHECK-SSE1-NEXT: andb %r15b, %cl 434 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl 435 ; CHECK-SSE1-NEXT: andb %bpl, %dl 436 ; CHECK-SSE1-NEXT: notb %r10b 437 ; CHECK-SSE1-NEXT: notb %r11b 438 ; CHECK-SSE1-NEXT: notb %r14b 439 ; CHECK-SSE1-NEXT: notb %bl 440 ; CHECK-SSE1-NEXT: notb %al 441 ; CHECK-SSE1-NEXT: notb %bpl 442 ; CHECK-SSE1-NEXT: notb %r15b 443 ; CHECK-SSE1-NEXT: notb %r12b 444 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b 445 ; CHECK-SSE1-NEXT: orb %r13b, %r12b 446 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b 447 ; CHECK-SSE1-NEXT: orb %cl, %r15b 448 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl 449 ; CHECK-SSE1-NEXT: orb %dl, %bpl 450 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 451 ; CHECK-SSE1-NEXT: orb %r9b, %al 452 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 453 ; CHECK-SSE1-NEXT: orb %r8b, %bl 454 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b 455 ; CHECK-SSE1-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %r14b # 1-byte Folded Reload 456 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 457 ; CHECK-SSE1-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %r11b # 1-byte Folded Reload 458 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 459 ; CHECK-SSE1-NEXT: orb %sil, %r10b 460 ; CHECK-SSE1-NEXT: movb %r12b, 7(%rdi) 461 ; CHECK-SSE1-NEXT: movb %r15b, 6(%rdi) 462 ; CHECK-SSE1-NEXT: movb %bpl, 5(%rdi) 463 ; CHECK-SSE1-NEXT: movb %al, 4(%rdi) 464 ; CHECK-SSE1-NEXT: movb %bl, 3(%rdi) 465 ; CHECK-SSE1-NEXT: movb %r14b, 2(%rdi) 466 ; CHECK-SSE1-NEXT: movb %r11b, 1(%rdi) 467 ; CHECK-SSE1-NEXT: movb %r10b, (%rdi) 468 ; CHECK-SSE1-NEXT: movq %rdi, %rax 469 ; CHECK-SSE1-NEXT: popq %rbx 470 ; CHECK-SSE1-NEXT: popq %r12 471 ; CHECK-SSE1-NEXT: popq %r13 472 ; CHECK-SSE1-NEXT: popq %r14 473 ; CHECK-SSE1-NEXT: popq %r15 474 ; CHECK-SSE1-NEXT: popq %rbp 475 ; CHECK-SSE1-NEXT: retq 476 ; 477 ; CHECK-SSE2-LABEL: out_v8i8: 478 ; CHECK-SSE2: # %bb.0: 479 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 480 ; CHECK-SSE2-NEXT: xorps {{.*}}(%rip), %xmm2 481 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 482 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 483 ; CHECK-SSE2-NEXT: retq 484 ; 485 ; CHECK-XOP-LABEL: out_v8i8: 486 ; CHECK-XOP: # %bb.0: 487 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm0, %xmm0 488 ; CHECK-XOP-NEXT: vxorps {{.*}}(%rip), %xmm2, %xmm2 489 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm1, %xmm1 490 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 491 ; CHECK-XOP-NEXT: retq 492 %mx = and <8 x i8> %x, %mask 493 %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 494 %my = and <8 x i8> %y, %notmask 495 %r = or <8 x i8> %mx, %my 496 ret <8 x i8> %r 497 } 498 499 define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 500 ; CHECK-BASELINE-LABEL: out_v4i16: 501 ; CHECK-BASELINE: # %bb.0: 502 ; CHECK-BASELINE-NEXT: pushq %rbx 503 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 504 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 505 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax 506 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 507 ; CHECK-BASELINE-NEXT: andl %ebx, %esi 508 ; CHECK-BASELINE-NEXT: andl %eax, %r8d 509 ; CHECK-BASELINE-NEXT: andl %r11d, %ecx 510 ; CHECK-BASELINE-NEXT: andl %r10d, %edx 511 ; CHECK-BASELINE-NEXT: notl %r10d 512 ; CHECK-BASELINE-NEXT: notl %r11d 513 ; CHECK-BASELINE-NEXT: notl %eax 514 ; CHECK-BASELINE-NEXT: notl %ebx 515 ; CHECK-BASELINE-NEXT: andl %r9d, %ebx 516 ; CHECK-BASELINE-NEXT: orl %esi, %ebx 517 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %ax 518 ; CHECK-BASELINE-NEXT: orl %r8d, %eax 519 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w 520 ; CHECK-BASELINE-NEXT: orl %ecx, %r11d 521 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w 522 ; CHECK-BASELINE-NEXT: orl %edx, %r10d 523 ; CHECK-BASELINE-NEXT: movw %bx, (%rdi) 524 ; CHECK-BASELINE-NEXT: movw %ax, 6(%rdi) 525 ; CHECK-BASELINE-NEXT: movw %r11w, 4(%rdi) 526 ; CHECK-BASELINE-NEXT: movw %r10w, 2(%rdi) 527 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 528 ; CHECK-BASELINE-NEXT: popq %rbx 529 ; CHECK-BASELINE-NEXT: retq 530 ; 531 ; CHECK-SSE1-LABEL: out_v4i16: 532 ; CHECK-SSE1: # %bb.0: 533 ; CHECK-SSE1-NEXT: pushq %rbx 534 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 535 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 536 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax 537 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 538 ; CHECK-SSE1-NEXT: andl %ebx, %esi 539 ; CHECK-SSE1-NEXT: andl %eax, %r8d 540 ; CHECK-SSE1-NEXT: andl %r11d, %ecx 541 ; CHECK-SSE1-NEXT: andl %r10d, %edx 542 ; CHECK-SSE1-NEXT: notl %r10d 543 ; CHECK-SSE1-NEXT: notl %r11d 544 ; CHECK-SSE1-NEXT: notl %eax 545 ; CHECK-SSE1-NEXT: notl %ebx 546 ; CHECK-SSE1-NEXT: andl %r9d, %ebx 547 ; CHECK-SSE1-NEXT: orl %esi, %ebx 548 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %ax 549 ; CHECK-SSE1-NEXT: orl %r8d, %eax 550 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w 551 ; CHECK-SSE1-NEXT: orl %ecx, %r11d 552 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w 553 ; CHECK-SSE1-NEXT: orl %edx, %r10d 554 ; CHECK-SSE1-NEXT: movw %bx, (%rdi) 555 ; CHECK-SSE1-NEXT: movw %ax, 6(%rdi) 556 ; CHECK-SSE1-NEXT: movw %r11w, 4(%rdi) 557 ; CHECK-SSE1-NEXT: movw %r10w, 2(%rdi) 558 ; CHECK-SSE1-NEXT: movq %rdi, %rax 559 ; CHECK-SSE1-NEXT: popq %rbx 560 ; CHECK-SSE1-NEXT: retq 561 ; 562 ; CHECK-SSE2-LABEL: out_v4i16: 563 ; CHECK-SSE2: # %bb.0: 564 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 565 ; CHECK-SSE2-NEXT: xorps {{.*}}(%rip), %xmm2 566 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 567 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 568 ; CHECK-SSE2-NEXT: retq 569 ; 570 ; CHECK-XOP-LABEL: out_v4i16: 571 ; CHECK-XOP: # %bb.0: 572 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm0, %xmm0 573 ; CHECK-XOP-NEXT: vxorps {{.*}}(%rip), %xmm2, %xmm2 574 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm1, %xmm1 575 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 576 ; CHECK-XOP-NEXT: retq 577 %mx = and <4 x i16> %x, %mask 578 %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1> 579 %my = and <4 x i16> %y, %notmask 580 %r = or <4 x i16> %mx, %my 581 ret <4 x i16> %r 582 } 583 584 define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 585 ; CHECK-BASELINE-LABEL: out_v4i16_undef: 586 ; CHECK-BASELINE: # %bb.0: 587 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 588 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 589 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax 590 ; CHECK-BASELINE-NEXT: andl %eax, %esi 591 ; CHECK-BASELINE-NEXT: andl %r11d, %r8d 592 ; CHECK-BASELINE-NEXT: andl %r10d, %edx 593 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 594 ; CHECK-BASELINE-NEXT: notl %r10d 595 ; CHECK-BASELINE-NEXT: notl %r11d 596 ; CHECK-BASELINE-NEXT: notl %eax 597 ; CHECK-BASELINE-NEXT: andl %r9d, %eax 598 ; CHECK-BASELINE-NEXT: orl %esi, %eax 599 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w 600 ; CHECK-BASELINE-NEXT: orl %r8d, %r11d 601 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w 602 ; CHECK-BASELINE-NEXT: orl %edx, %r10d 603 ; CHECK-BASELINE-NEXT: movw %cx, 4(%rdi) 604 ; CHECK-BASELINE-NEXT: movw %ax, (%rdi) 605 ; CHECK-BASELINE-NEXT: movw %r11w, 6(%rdi) 606 ; CHECK-BASELINE-NEXT: movw %r10w, 2(%rdi) 607 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 608 ; CHECK-BASELINE-NEXT: retq 609 ; 610 ; CHECK-SSE1-LABEL: out_v4i16_undef: 611 ; CHECK-SSE1: # %bb.0: 612 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 613 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 614 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax 615 ; CHECK-SSE1-NEXT: andl %eax, %esi 616 ; CHECK-SSE1-NEXT: andl %r11d, %r8d 617 ; CHECK-SSE1-NEXT: andl %r10d, %edx 618 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 619 ; CHECK-SSE1-NEXT: notl %r10d 620 ; CHECK-SSE1-NEXT: notl %r11d 621 ; CHECK-SSE1-NEXT: notl %eax 622 ; CHECK-SSE1-NEXT: andl %r9d, %eax 623 ; CHECK-SSE1-NEXT: orl %esi, %eax 624 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w 625 ; CHECK-SSE1-NEXT: orl %r8d, %r11d 626 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w 627 ; CHECK-SSE1-NEXT: orl %edx, %r10d 628 ; CHECK-SSE1-NEXT: movw %cx, 4(%rdi) 629 ; CHECK-SSE1-NEXT: movw %ax, (%rdi) 630 ; CHECK-SSE1-NEXT: movw %r11w, 6(%rdi) 631 ; CHECK-SSE1-NEXT: movw %r10w, 2(%rdi) 632 ; CHECK-SSE1-NEXT: movq %rdi, %rax 633 ; CHECK-SSE1-NEXT: retq 634 ; 635 ; CHECK-SSE2-LABEL: out_v4i16_undef: 636 ; CHECK-SSE2: # %bb.0: 637 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 638 ; CHECK-SSE2-NEXT: xorps {{.*}}(%rip), %xmm2 639 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 640 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 641 ; CHECK-SSE2-NEXT: retq 642 ; 643 ; CHECK-XOP-LABEL: out_v4i16_undef: 644 ; CHECK-XOP: # %bb.0: 645 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm0, %xmm0 646 ; CHECK-XOP-NEXT: vxorps {{.*}}(%rip), %xmm2, %xmm2 647 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm1, %xmm1 648 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 649 ; CHECK-XOP-NEXT: retq 650 %mx = and <4 x i16> %x, %mask 651 %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1> 652 %my = and <4 x i16> %y, %notmask 653 %r = or <4 x i16> %mx, %my 654 ret <4 x i16> %r 655 } 656 657 define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { 658 ; CHECK-BASELINE-LABEL: out_v2i32: 659 ; CHECK-BASELINE: # %bb.0: 660 ; CHECK-BASELINE-NEXT: andl %r9d, %esi 661 ; CHECK-BASELINE-NEXT: andl %r8d, %edi 662 ; CHECK-BASELINE-NEXT: notl %r8d 663 ; CHECK-BASELINE-NEXT: notl %r9d 664 ; CHECK-BASELINE-NEXT: andl %ecx, %r9d 665 ; CHECK-BASELINE-NEXT: orl %esi, %r9d 666 ; CHECK-BASELINE-NEXT: andl %edx, %r8d 667 ; CHECK-BASELINE-NEXT: orl %edi, %r8d 668 ; CHECK-BASELINE-NEXT: movl %r8d, %eax 669 ; CHECK-BASELINE-NEXT: movl %r9d, %edx 670 ; CHECK-BASELINE-NEXT: retq 671 ; 672 ; CHECK-SSE1-LABEL: out_v2i32: 673 ; CHECK-SSE1: # %bb.0: 674 ; CHECK-SSE1-NEXT: andl %r9d, %esi 675 ; CHECK-SSE1-NEXT: andl %r8d, %edi 676 ; CHECK-SSE1-NEXT: notl %r8d 677 ; CHECK-SSE1-NEXT: notl %r9d 678 ; CHECK-SSE1-NEXT: andl %ecx, %r9d 679 ; CHECK-SSE1-NEXT: orl %esi, %r9d 680 ; CHECK-SSE1-NEXT: andl %edx, %r8d 681 ; CHECK-SSE1-NEXT: orl %edi, %r8d 682 ; CHECK-SSE1-NEXT: movl %r8d, %eax 683 ; CHECK-SSE1-NEXT: movl %r9d, %edx 684 ; CHECK-SSE1-NEXT: retq 685 ; 686 ; CHECK-SSE2-LABEL: out_v2i32: 687 ; CHECK-SSE2: # %bb.0: 688 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 689 ; CHECK-SSE2-NEXT: xorps {{.*}}(%rip), %xmm2 690 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 691 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 692 ; CHECK-SSE2-NEXT: retq 693 ; 694 ; CHECK-XOP-LABEL: out_v2i32: 695 ; CHECK-XOP: # %bb.0: 696 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm0, %xmm0 697 ; CHECK-XOP-NEXT: vxorps {{.*}}(%rip), %xmm2, %xmm2 698 ; CHECK-XOP-NEXT: vandps %xmm2, %xmm1, %xmm1 699 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 700 ; CHECK-XOP-NEXT: retq 701 %mx = and <2 x i32> %x, %mask 702 %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1> 703 %my = and <2 x i32> %y, %notmask 704 %r = or <2 x i32> %mx, %my 705 ret <2 x i32> %r 706 } 707 708 define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { 709 ; CHECK-LABEL: out_v1i64: 710 ; CHECK: # %bb.0: 711 ; CHECK-NEXT: andq %rdx, %rdi 712 ; CHECK-NEXT: notq %rdx 713 ; CHECK-NEXT: andq %rsi, %rdx 714 ; CHECK-NEXT: orq %rdi, %rdx 715 ; CHECK-NEXT: movq %rdx, %rax 716 ; CHECK-NEXT: retq 717 %mx = and <1 x i64> %x, %mask 718 %notmask = xor <1 x i64> %mask, <i64 -1> 719 %my = and <1 x i64> %y, %notmask 720 %r = or <1 x i64> %mx, %my 721 ret <1 x i64> %r 722 } 723 724 ; ============================================================================ ; 725 ; 128-bit vector width 726 ; ============================================================================ ; 727 728 define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { 729 ; CHECK-BASELINE-LABEL: out_v16i8: 730 ; CHECK-BASELINE: # %bb.0: 731 ; CHECK-BASELINE-NEXT: pushq %rbp 732 ; CHECK-BASELINE-NEXT: pushq %r15 733 ; CHECK-BASELINE-NEXT: pushq %r14 734 ; CHECK-BASELINE-NEXT: pushq %r13 735 ; CHECK-BASELINE-NEXT: pushq %r12 736 ; CHECK-BASELINE-NEXT: pushq %rbx 737 ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 738 ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 739 ; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 740 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 741 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 742 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl 743 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b 744 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b 745 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b 746 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b 747 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 748 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl 749 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 750 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 751 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 752 ; CHECK-BASELINE-NEXT: andb %al, %sil 753 ; CHECK-BASELINE-NEXT: notb %al 754 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 755 ; CHECK-BASELINE-NEXT: orb %sil, %al 756 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 757 ; CHECK-BASELINE-NEXT: andb %cl, %sil 758 ; CHECK-BASELINE-NEXT: notb %cl 759 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 760 ; CHECK-BASELINE-NEXT: orb %sil, %cl 761 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 762 ; CHECK-BASELINE-NEXT: andb %dl, %sil 763 ; CHECK-BASELINE-NEXT: notb %dl 764 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 765 ; CHECK-BASELINE-NEXT: orb %sil, %dl 766 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 767 ; CHECK-BASELINE-NEXT: andb %bl, %sil 768 ; CHECK-BASELINE-NEXT: notb %bl 769 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 770 ; CHECK-BASELINE-NEXT: orb %sil, %bl 771 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 772 ; CHECK-BASELINE-NEXT: andb %r13b, %sil 773 ; CHECK-BASELINE-NEXT: notb %r13b 774 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b 775 ; CHECK-BASELINE-NEXT: orb %sil, %r13b 776 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 777 ; CHECK-BASELINE-NEXT: andb %r12b, %sil 778 ; CHECK-BASELINE-NEXT: notb %r12b 779 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b 780 ; CHECK-BASELINE-NEXT: orb %sil, %r12b 781 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 782 ; CHECK-BASELINE-NEXT: andb %r15b, %sil 783 ; CHECK-BASELINE-NEXT: notb %r15b 784 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b 785 ; CHECK-BASELINE-NEXT: orb %sil, %r15b 786 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 787 ; CHECK-BASELINE-NEXT: andb %r14b, %sil 788 ; CHECK-BASELINE-NEXT: notb %r14b 789 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b 790 ; CHECK-BASELINE-NEXT: orb %sil, %r14b 791 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 792 ; CHECK-BASELINE-NEXT: andb %bpl, %sil 793 ; CHECK-BASELINE-NEXT: notb %bpl 794 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl 795 ; CHECK-BASELINE-NEXT: orb %sil, %bpl 796 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 797 ; CHECK-BASELINE-NEXT: andb %r11b, %sil 798 ; CHECK-BASELINE-NEXT: notb %r11b 799 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 800 ; CHECK-BASELINE-NEXT: orb %sil, %r11b 801 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 802 ; CHECK-BASELINE-NEXT: andb %r10b, %sil 803 ; CHECK-BASELINE-NEXT: notb %r10b 804 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 805 ; CHECK-BASELINE-NEXT: orb %sil, %r10b 806 ; CHECK-BASELINE-NEXT: movb %al, 15(%rdi) 807 ; CHECK-BASELINE-NEXT: movb %cl, 14(%rdi) 808 ; CHECK-BASELINE-NEXT: movb %dl, 13(%rdi) 809 ; CHECK-BASELINE-NEXT: movb %bl, 12(%rdi) 810 ; CHECK-BASELINE-NEXT: movb %r13b, 11(%rdi) 811 ; CHECK-BASELINE-NEXT: movb %r12b, 10(%rdi) 812 ; CHECK-BASELINE-NEXT: movb %r15b, 9(%rdi) 813 ; CHECK-BASELINE-NEXT: movb %r14b, 8(%rdi) 814 ; CHECK-BASELINE-NEXT: movb %bpl, 7(%rdi) 815 ; CHECK-BASELINE-NEXT: movb %r11b, 6(%rdi) 816 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 817 ; CHECK-BASELINE-NEXT: andb %al, %r9b 818 ; CHECK-BASELINE-NEXT: notb %al 819 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 820 ; CHECK-BASELINE-NEXT: orb %r9b, %al 821 ; CHECK-BASELINE-NEXT: movb %r10b, 5(%rdi) 822 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 823 ; CHECK-BASELINE-NEXT: andb %cl, %r8b 824 ; CHECK-BASELINE-NEXT: notb %cl 825 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 826 ; CHECK-BASELINE-NEXT: orb %r8b, %cl 827 ; CHECK-BASELINE-NEXT: movb %al, 4(%rdi) 828 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 829 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 830 ; CHECK-BASELINE-NEXT: andb %al, %dl 831 ; CHECK-BASELINE-NEXT: notb %al 832 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 833 ; CHECK-BASELINE-NEXT: orb %dl, %al 834 ; CHECK-BASELINE-NEXT: movb %cl, 3(%rdi) 835 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 836 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 837 ; CHECK-BASELINE-NEXT: andb %cl, %dl 838 ; CHECK-BASELINE-NEXT: notb %cl 839 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 840 ; CHECK-BASELINE-NEXT: orb %dl, %cl 841 ; CHECK-BASELINE-NEXT: movb %al, 2(%rdi) 842 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 843 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 844 ; CHECK-BASELINE-NEXT: andb %al, %dl 845 ; CHECK-BASELINE-NEXT: notb %al 846 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 847 ; CHECK-BASELINE-NEXT: orb %dl, %al 848 ; CHECK-BASELINE-NEXT: movb %cl, 1(%rdi) 849 ; CHECK-BASELINE-NEXT: movb %al, (%rdi) 850 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 851 ; CHECK-BASELINE-NEXT: popq %rbx 852 ; CHECK-BASELINE-NEXT: popq %r12 853 ; CHECK-BASELINE-NEXT: popq %r13 854 ; CHECK-BASELINE-NEXT: popq %r14 855 ; CHECK-BASELINE-NEXT: popq %r15 856 ; CHECK-BASELINE-NEXT: popq %rbp 857 ; CHECK-BASELINE-NEXT: retq 858 ; 859 ; CHECK-SSE1-LABEL: out_v16i8: 860 ; CHECK-SSE1: # %bb.0: 861 ; CHECK-SSE1-NEXT: pushq %rbp 862 ; CHECK-SSE1-NEXT: pushq %r15 863 ; CHECK-SSE1-NEXT: pushq %r14 864 ; CHECK-SSE1-NEXT: pushq %r13 865 ; CHECK-SSE1-NEXT: pushq %r12 866 ; CHECK-SSE1-NEXT: pushq %rbx 867 ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 868 ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 869 ; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 870 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 871 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 872 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 873 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 874 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 875 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 876 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b 877 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 878 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl 879 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 880 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 881 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 882 ; CHECK-SSE1-NEXT: andb %al, %sil 883 ; CHECK-SSE1-NEXT: notb %al 884 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 885 ; CHECK-SSE1-NEXT: orb %sil, %al 886 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 887 ; CHECK-SSE1-NEXT: andb %cl, %sil 888 ; CHECK-SSE1-NEXT: notb %cl 889 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 890 ; CHECK-SSE1-NEXT: orb %sil, %cl 891 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 892 ; CHECK-SSE1-NEXT: andb %dl, %sil 893 ; CHECK-SSE1-NEXT: notb %dl 894 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 895 ; CHECK-SSE1-NEXT: orb %sil, %dl 896 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 897 ; CHECK-SSE1-NEXT: andb %bl, %sil 898 ; CHECK-SSE1-NEXT: notb %bl 899 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 900 ; CHECK-SSE1-NEXT: orb %sil, %bl 901 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 902 ; CHECK-SSE1-NEXT: andb %r13b, %sil 903 ; CHECK-SSE1-NEXT: notb %r13b 904 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b 905 ; CHECK-SSE1-NEXT: orb %sil, %r13b 906 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 907 ; CHECK-SSE1-NEXT: andb %r12b, %sil 908 ; CHECK-SSE1-NEXT: notb %r12b 909 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b 910 ; CHECK-SSE1-NEXT: orb %sil, %r12b 911 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 912 ; CHECK-SSE1-NEXT: andb %r15b, %sil 913 ; CHECK-SSE1-NEXT: notb %r15b 914 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b 915 ; CHECK-SSE1-NEXT: orb %sil, %r15b 916 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 917 ; CHECK-SSE1-NEXT: andb %r14b, %sil 918 ; CHECK-SSE1-NEXT: notb %r14b 919 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b 920 ; CHECK-SSE1-NEXT: orb %sil, %r14b 921 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 922 ; CHECK-SSE1-NEXT: andb %bpl, %sil 923 ; CHECK-SSE1-NEXT: notb %bpl 924 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl 925 ; CHECK-SSE1-NEXT: orb %sil, %bpl 926 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 927 ; CHECK-SSE1-NEXT: andb %r11b, %sil 928 ; CHECK-SSE1-NEXT: notb %r11b 929 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 930 ; CHECK-SSE1-NEXT: orb %sil, %r11b 931 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 932 ; CHECK-SSE1-NEXT: andb %r10b, %sil 933 ; CHECK-SSE1-NEXT: notb %r10b 934 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 935 ; CHECK-SSE1-NEXT: orb %sil, %r10b 936 ; CHECK-SSE1-NEXT: movb %al, 15(%rdi) 937 ; CHECK-SSE1-NEXT: movb %cl, 14(%rdi) 938 ; CHECK-SSE1-NEXT: movb %dl, 13(%rdi) 939 ; CHECK-SSE1-NEXT: movb %bl, 12(%rdi) 940 ; CHECK-SSE1-NEXT: movb %r13b, 11(%rdi) 941 ; CHECK-SSE1-NEXT: movb %r12b, 10(%rdi) 942 ; CHECK-SSE1-NEXT: movb %r15b, 9(%rdi) 943 ; CHECK-SSE1-NEXT: movb %r14b, 8(%rdi) 944 ; CHECK-SSE1-NEXT: movb %bpl, 7(%rdi) 945 ; CHECK-SSE1-NEXT: movb %r11b, 6(%rdi) 946 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 947 ; CHECK-SSE1-NEXT: andb %al, %r9b 948 ; CHECK-SSE1-NEXT: notb %al 949 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 950 ; CHECK-SSE1-NEXT: orb %r9b, %al 951 ; CHECK-SSE1-NEXT: movb %r10b, 5(%rdi) 952 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 953 ; CHECK-SSE1-NEXT: andb %cl, %r8b 954 ; CHECK-SSE1-NEXT: notb %cl 955 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 956 ; CHECK-SSE1-NEXT: orb %r8b, %cl 957 ; CHECK-SSE1-NEXT: movb %al, 4(%rdi) 958 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 959 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 960 ; CHECK-SSE1-NEXT: andb %al, %dl 961 ; CHECK-SSE1-NEXT: notb %al 962 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 963 ; CHECK-SSE1-NEXT: orb %dl, %al 964 ; CHECK-SSE1-NEXT: movb %cl, 3(%rdi) 965 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 966 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 967 ; CHECK-SSE1-NEXT: andb %cl, %dl 968 ; CHECK-SSE1-NEXT: notb %cl 969 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 970 ; CHECK-SSE1-NEXT: orb %dl, %cl 971 ; CHECK-SSE1-NEXT: movb %al, 2(%rdi) 972 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 973 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 974 ; CHECK-SSE1-NEXT: andb %al, %dl 975 ; CHECK-SSE1-NEXT: notb %al 976 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 977 ; CHECK-SSE1-NEXT: orb %dl, %al 978 ; CHECK-SSE1-NEXT: movb %cl, 1(%rdi) 979 ; CHECK-SSE1-NEXT: movb %al, (%rdi) 980 ; CHECK-SSE1-NEXT: movq %rdi, %rax 981 ; CHECK-SSE1-NEXT: popq %rbx 982 ; CHECK-SSE1-NEXT: popq %r12 983 ; CHECK-SSE1-NEXT: popq %r13 984 ; CHECK-SSE1-NEXT: popq %r14 985 ; CHECK-SSE1-NEXT: popq %r15 986 ; CHECK-SSE1-NEXT: popq %rbp 987 ; CHECK-SSE1-NEXT: retq 988 ; 989 ; CHECK-SSE2-LABEL: out_v16i8: 990 ; CHECK-SSE2: # %bb.0: 991 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 992 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 993 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 994 ; CHECK-SSE2-NEXT: retq 995 ; 996 ; CHECK-XOP-LABEL: out_v16i8: 997 ; CHECK-XOP: # %bb.0: 998 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 999 ; CHECK-XOP-NEXT: retq 1000 %mx = and <16 x i8> %x, %mask 1001 %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1002 %my = and <16 x i8> %y, %notmask 1003 %r = or <16 x i8> %mx, %my 1004 ret <16 x i8> %r 1005 } 1006 1007 define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { 1008 ; CHECK-BASELINE-LABEL: out_v8i16: 1009 ; CHECK-BASELINE: # %bb.0: 1010 ; CHECK-BASELINE-NEXT: pushq %rbp 1011 ; CHECK-BASELINE-NEXT: pushq %r14 1012 ; CHECK-BASELINE-NEXT: pushq %rbx 1013 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 1014 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 1015 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r14d 1016 ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 1017 ; CHECK-BASELINE-NEXT: andw %r14w, %bx 1018 ; CHECK-BASELINE-NEXT: notl %r14d 1019 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r14w 1020 ; CHECK-BASELINE-NEXT: orl %ebx, %r14d 1021 ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 1022 ; CHECK-BASELINE-NEXT: andw %r11w, %bx 1023 ; CHECK-BASELINE-NEXT: notl %r11d 1024 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w 1025 ; CHECK-BASELINE-NEXT: orl %ebx, %r11d 1026 ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 1027 ; CHECK-BASELINE-NEXT: andw %r10w, %bx 1028 ; CHECK-BASELINE-NEXT: notl %r10d 1029 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w 1030 ; CHECK-BASELINE-NEXT: orl %ebx, %r10d 1031 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 1032 ; CHECK-BASELINE-NEXT: andl %ebx, %r9d 1033 ; CHECK-BASELINE-NEXT: notl %ebx 1034 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx 1035 ; CHECK-BASELINE-NEXT: orl %r9d, %ebx 1036 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax 1037 ; CHECK-BASELINE-NEXT: andl %eax, %r8d 1038 ; CHECK-BASELINE-NEXT: notl %eax 1039 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %ax 1040 ; CHECK-BASELINE-NEXT: orl %r8d, %eax 1041 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebp 1042 ; CHECK-BASELINE-NEXT: andl %ebp, %ecx 1043 ; CHECK-BASELINE-NEXT: notl %ebp 1044 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bp 1045 ; CHECK-BASELINE-NEXT: orl %ecx, %ebp 1046 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ecx 1047 ; CHECK-BASELINE-NEXT: andl %ecx, %edx 1048 ; CHECK-BASELINE-NEXT: notl %ecx 1049 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 1050 ; CHECK-BASELINE-NEXT: orl %edx, %ecx 1051 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edx 1052 ; CHECK-BASELINE-NEXT: andl %edx, %esi 1053 ; CHECK-BASELINE-NEXT: notl %edx 1054 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 1055 ; CHECK-BASELINE-NEXT: orl %esi, %edx 1056 ; CHECK-BASELINE-NEXT: movw %r14w, 14(%rdi) 1057 ; CHECK-BASELINE-NEXT: movw %r11w, 12(%rdi) 1058 ; CHECK-BASELINE-NEXT: movw %r10w, 10(%rdi) 1059 ; CHECK-BASELINE-NEXT: movw %bx, 8(%rdi) 1060 ; CHECK-BASELINE-NEXT: movw %ax, 6(%rdi) 1061 ; CHECK-BASELINE-NEXT: movw %bp, 4(%rdi) 1062 ; CHECK-BASELINE-NEXT: movw %cx, 2(%rdi) 1063 ; CHECK-BASELINE-NEXT: movw %dx, (%rdi) 1064 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 1065 ; CHECK-BASELINE-NEXT: popq %rbx 1066 ; CHECK-BASELINE-NEXT: popq %r14 1067 ; CHECK-BASELINE-NEXT: popq %rbp 1068 ; CHECK-BASELINE-NEXT: retq 1069 ; 1070 ; CHECK-SSE1-LABEL: out_v8i16: 1071 ; CHECK-SSE1: # %bb.0: 1072 ; CHECK-SSE1-NEXT: pushq %rbp 1073 ; CHECK-SSE1-NEXT: pushq %r14 1074 ; CHECK-SSE1-NEXT: pushq %rbx 1075 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 1076 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 1077 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r14d 1078 ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 1079 ; CHECK-SSE1-NEXT: andw %r14w, %bx 1080 ; CHECK-SSE1-NEXT: notl %r14d 1081 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r14w 1082 ; CHECK-SSE1-NEXT: orl %ebx, %r14d 1083 ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 1084 ; CHECK-SSE1-NEXT: andw %r11w, %bx 1085 ; CHECK-SSE1-NEXT: notl %r11d 1086 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w 1087 ; CHECK-SSE1-NEXT: orl %ebx, %r11d 1088 ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 1089 ; CHECK-SSE1-NEXT: andw %r10w, %bx 1090 ; CHECK-SSE1-NEXT: notl %r10d 1091 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w 1092 ; CHECK-SSE1-NEXT: orl %ebx, %r10d 1093 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 1094 ; CHECK-SSE1-NEXT: andl %ebx, %r9d 1095 ; CHECK-SSE1-NEXT: notl %ebx 1096 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx 1097 ; CHECK-SSE1-NEXT: orl %r9d, %ebx 1098 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax 1099 ; CHECK-SSE1-NEXT: andl %eax, %r8d 1100 ; CHECK-SSE1-NEXT: notl %eax 1101 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %ax 1102 ; CHECK-SSE1-NEXT: orl %r8d, %eax 1103 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebp 1104 ; CHECK-SSE1-NEXT: andl %ebp, %ecx 1105 ; CHECK-SSE1-NEXT: notl %ebp 1106 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bp 1107 ; CHECK-SSE1-NEXT: orl %ecx, %ebp 1108 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ecx 1109 ; CHECK-SSE1-NEXT: andl %ecx, %edx 1110 ; CHECK-SSE1-NEXT: notl %ecx 1111 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 1112 ; CHECK-SSE1-NEXT: orl %edx, %ecx 1113 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edx 1114 ; CHECK-SSE1-NEXT: andl %edx, %esi 1115 ; CHECK-SSE1-NEXT: notl %edx 1116 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 1117 ; CHECK-SSE1-NEXT: orl %esi, %edx 1118 ; CHECK-SSE1-NEXT: movw %r14w, 14(%rdi) 1119 ; CHECK-SSE1-NEXT: movw %r11w, 12(%rdi) 1120 ; CHECK-SSE1-NEXT: movw %r10w, 10(%rdi) 1121 ; CHECK-SSE1-NEXT: movw %bx, 8(%rdi) 1122 ; CHECK-SSE1-NEXT: movw %ax, 6(%rdi) 1123 ; CHECK-SSE1-NEXT: movw %bp, 4(%rdi) 1124 ; CHECK-SSE1-NEXT: movw %cx, 2(%rdi) 1125 ; CHECK-SSE1-NEXT: movw %dx, (%rdi) 1126 ; CHECK-SSE1-NEXT: movq %rdi, %rax 1127 ; CHECK-SSE1-NEXT: popq %rbx 1128 ; CHECK-SSE1-NEXT: popq %r14 1129 ; CHECK-SSE1-NEXT: popq %rbp 1130 ; CHECK-SSE1-NEXT: retq 1131 ; 1132 ; CHECK-SSE2-LABEL: out_v8i16: 1133 ; CHECK-SSE2: # %bb.0: 1134 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 1135 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 1136 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 1137 ; CHECK-SSE2-NEXT: retq 1138 ; 1139 ; CHECK-XOP-LABEL: out_v8i16: 1140 ; CHECK-XOP: # %bb.0: 1141 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 1142 ; CHECK-XOP-NEXT: retq 1143 %mx = and <8 x i16> %x, %mask 1144 %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1145 %my = and <8 x i16> %y, %notmask 1146 %r = or <8 x i16> %mx, %my 1147 ret <8 x i16> %r 1148 } 1149 1150 define <4 x i32> @out_v4i32(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind { 1151 ; CHECK-BASELINE-LABEL: out_v4i32: 1152 ; CHECK-BASELINE: # %bb.0: 1153 ; CHECK-BASELINE-NEXT: pushq %rbx 1154 ; CHECK-BASELINE-NEXT: movl (%rcx), %r8d 1155 ; CHECK-BASELINE-NEXT: movl 4(%rcx), %r9d 1156 ; CHECK-BASELINE-NEXT: movl 8(%rcx), %eax 1157 ; CHECK-BASELINE-NEXT: movl 12(%rcx), %ecx 1158 ; CHECK-BASELINE-NEXT: movl 12(%rsi), %r10d 1159 ; CHECK-BASELINE-NEXT: andl %ecx, %r10d 1160 ; CHECK-BASELINE-NEXT: movl 8(%rsi), %r11d 1161 ; CHECK-BASELINE-NEXT: andl %eax, %r11d 1162 ; CHECK-BASELINE-NEXT: movl 4(%rsi), %ebx 1163 ; CHECK-BASELINE-NEXT: andl %r9d, %ebx 1164 ; CHECK-BASELINE-NEXT: movl (%rsi), %esi 1165 ; CHECK-BASELINE-NEXT: andl %r8d, %esi 1166 ; CHECK-BASELINE-NEXT: notl %r8d 1167 ; CHECK-BASELINE-NEXT: notl %r9d 1168 ; CHECK-BASELINE-NEXT: notl %eax 1169 ; CHECK-BASELINE-NEXT: notl %ecx 1170 ; CHECK-BASELINE-NEXT: andl 12(%rdx), %ecx 1171 ; CHECK-BASELINE-NEXT: orl %r10d, %ecx 1172 ; CHECK-BASELINE-NEXT: andl 8(%rdx), %eax 1173 ; CHECK-BASELINE-NEXT: orl %r11d, %eax 1174 ; CHECK-BASELINE-NEXT: andl 4(%rdx), %r9d 1175 ; CHECK-BASELINE-NEXT: orl %ebx, %r9d 1176 ; CHECK-BASELINE-NEXT: andl (%rdx), %r8d 1177 ; CHECK-BASELINE-NEXT: orl %esi, %r8d 1178 ; CHECK-BASELINE-NEXT: movl %ecx, 12(%rdi) 1179 ; CHECK-BASELINE-NEXT: movl %eax, 8(%rdi) 1180 ; CHECK-BASELINE-NEXT: movl %r9d, 4(%rdi) 1181 ; CHECK-BASELINE-NEXT: movl %r8d, (%rdi) 1182 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 1183 ; CHECK-BASELINE-NEXT: popq %rbx 1184 ; CHECK-BASELINE-NEXT: retq 1185 ; 1186 ; CHECK-SSE1-LABEL: out_v4i32: 1187 ; CHECK-SSE1: # %bb.0: 1188 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 1189 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 1190 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 1191 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 1192 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 1193 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 1194 ; CHECK-SSE1-NEXT: movq %rdi, %rax 1195 ; CHECK-SSE1-NEXT: retq 1196 ; 1197 ; CHECK-SSE2-LABEL: out_v4i32: 1198 ; CHECK-SSE2: # %bb.0: 1199 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 1200 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 1201 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 1202 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 1203 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 1204 ; CHECK-SSE2-NEXT: retq 1205 ; 1206 ; CHECK-XOP-LABEL: out_v4i32: 1207 ; CHECK-XOP: # %bb.0: 1208 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 1209 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 1210 ; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 1211 ; CHECK-XOP-NEXT: retq 1212 %x = load <4 x i32>, <4 x i32> *%px, align 16 1213 %y = load <4 x i32>, <4 x i32> *%py, align 16 1214 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 1215 %mx = and <4 x i32> %x, %mask 1216 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 1217 %my = and <4 x i32> %y, %notmask 1218 %r = or <4 x i32> %mx, %my 1219 ret <4 x i32> %r 1220 } 1221 1222 define <4 x i32> @out_v4i32_undef(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind { 1223 ; CHECK-BASELINE-LABEL: out_v4i32_undef: 1224 ; CHECK-BASELINE: # %bb.0: 1225 ; CHECK-BASELINE-NEXT: movl 8(%rsi), %r8d 1226 ; CHECK-BASELINE-NEXT: movl (%rcx), %r9d 1227 ; CHECK-BASELINE-NEXT: movl 4(%rcx), %r10d 1228 ; CHECK-BASELINE-NEXT: movl 12(%rcx), %eax 1229 ; CHECK-BASELINE-NEXT: andl 8(%rcx), %r8d 1230 ; CHECK-BASELINE-NEXT: movl 12(%rsi), %ecx 1231 ; CHECK-BASELINE-NEXT: andl %eax, %ecx 1232 ; CHECK-BASELINE-NEXT: movl 4(%rsi), %r11d 1233 ; CHECK-BASELINE-NEXT: andl %r10d, %r11d 1234 ; CHECK-BASELINE-NEXT: movl (%rsi), %esi 1235 ; CHECK-BASELINE-NEXT: andl %r9d, %esi 1236 ; CHECK-BASELINE-NEXT: notl %r9d 1237 ; CHECK-BASELINE-NEXT: notl %r10d 1238 ; CHECK-BASELINE-NEXT: notl %eax 1239 ; CHECK-BASELINE-NEXT: andl 12(%rdx), %eax 1240 ; CHECK-BASELINE-NEXT: orl %ecx, %eax 1241 ; CHECK-BASELINE-NEXT: andl 4(%rdx), %r10d 1242 ; CHECK-BASELINE-NEXT: orl %r11d, %r10d 1243 ; CHECK-BASELINE-NEXT: andl (%rdx), %r9d 1244 ; CHECK-BASELINE-NEXT: orl %esi, %r9d 1245 ; CHECK-BASELINE-NEXT: movl %r8d, 8(%rdi) 1246 ; CHECK-BASELINE-NEXT: movl %eax, 12(%rdi) 1247 ; CHECK-BASELINE-NEXT: movl %r10d, 4(%rdi) 1248 ; CHECK-BASELINE-NEXT: movl %r9d, (%rdi) 1249 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 1250 ; CHECK-BASELINE-NEXT: retq 1251 ; 1252 ; CHECK-SSE1-LABEL: out_v4i32_undef: 1253 ; CHECK-SSE1: # %bb.0: 1254 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 1255 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 1256 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 1257 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 1258 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 1259 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 1260 ; CHECK-SSE1-NEXT: movq %rdi, %rax 1261 ; CHECK-SSE1-NEXT: retq 1262 ; 1263 ; CHECK-SSE2-LABEL: out_v4i32_undef: 1264 ; CHECK-SSE2: # %bb.0: 1265 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 1266 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 1267 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 1268 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 1269 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 1270 ; CHECK-SSE2-NEXT: retq 1271 ; 1272 ; CHECK-XOP-LABEL: out_v4i32_undef: 1273 ; CHECK-XOP: # %bb.0: 1274 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 1275 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 1276 ; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 1277 ; CHECK-XOP-NEXT: retq 1278 %x = load <4 x i32>, <4 x i32> *%px, align 16 1279 %y = load <4 x i32>, <4 x i32> *%py, align 16 1280 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 1281 %mx = and <4 x i32> %x, %mask 1282 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1> 1283 %my = and <4 x i32> %y, %notmask 1284 %r = or <4 x i32> %mx, %my 1285 ret <4 x i32> %r 1286 } 1287 1288 define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { 1289 ; CHECK-BASELINE-LABEL: out_v2i64: 1290 ; CHECK-BASELINE: # %bb.0: 1291 ; CHECK-BASELINE-NEXT: andq %r9, %rsi 1292 ; CHECK-BASELINE-NEXT: andq %r8, %rdi 1293 ; CHECK-BASELINE-NEXT: notq %r8 1294 ; CHECK-BASELINE-NEXT: notq %r9 1295 ; CHECK-BASELINE-NEXT: andq %rcx, %r9 1296 ; CHECK-BASELINE-NEXT: orq %rsi, %r9 1297 ; CHECK-BASELINE-NEXT: andq %rdx, %r8 1298 ; CHECK-BASELINE-NEXT: orq %rdi, %r8 1299 ; CHECK-BASELINE-NEXT: movq %r8, %rax 1300 ; CHECK-BASELINE-NEXT: movq %r9, %rdx 1301 ; CHECK-BASELINE-NEXT: retq 1302 ; 1303 ; CHECK-SSE1-LABEL: out_v2i64: 1304 ; CHECK-SSE1: # %bb.0: 1305 ; CHECK-SSE1-NEXT: andq %r9, %rsi 1306 ; CHECK-SSE1-NEXT: andq %r8, %rdi 1307 ; CHECK-SSE1-NEXT: notq %r8 1308 ; CHECK-SSE1-NEXT: notq %r9 1309 ; CHECK-SSE1-NEXT: andq %rcx, %r9 1310 ; CHECK-SSE1-NEXT: orq %rsi, %r9 1311 ; CHECK-SSE1-NEXT: andq %rdx, %r8 1312 ; CHECK-SSE1-NEXT: orq %rdi, %r8 1313 ; CHECK-SSE1-NEXT: movq %r8, %rax 1314 ; CHECK-SSE1-NEXT: movq %r9, %rdx 1315 ; CHECK-SSE1-NEXT: retq 1316 ; 1317 ; CHECK-SSE2-LABEL: out_v2i64: 1318 ; CHECK-SSE2: # %bb.0: 1319 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 1320 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 1321 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 1322 ; CHECK-SSE2-NEXT: retq 1323 ; 1324 ; CHECK-XOP-LABEL: out_v2i64: 1325 ; CHECK-XOP: # %bb.0: 1326 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 1327 ; CHECK-XOP-NEXT: retq 1328 %mx = and <2 x i64> %x, %mask 1329 %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1> 1330 %my = and <2 x i64> %y, %notmask 1331 %r = or <2 x i64> %mx, %my 1332 ret <2 x i64> %r 1333 } 1334 1335 ; ============================================================================ ; 1336 ; 256-bit vector width 1337 ; ============================================================================ ; 1338 1339 define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) nounwind { 1340 ; CHECK-BASELINE-LABEL: out_v32i8: 1341 ; CHECK-BASELINE: # %bb.0: 1342 ; CHECK-BASELINE-NEXT: pushq %rbp 1343 ; CHECK-BASELINE-NEXT: pushq %r15 1344 ; CHECK-BASELINE-NEXT: pushq %r14 1345 ; CHECK-BASELINE-NEXT: pushq %r13 1346 ; CHECK-BASELINE-NEXT: pushq %r12 1347 ; CHECK-BASELINE-NEXT: pushq %rbx 1348 ; CHECK-BASELINE-NEXT: movq %rcx, %r15 1349 ; CHECK-BASELINE-NEXT: movq %rsi, %r14 1350 ; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1351 ; CHECK-BASELINE-NEXT: movb 15(%rcx), %al 1352 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1353 ; CHECK-BASELINE-NEXT: movb 16(%rcx), %al 1354 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1355 ; CHECK-BASELINE-NEXT: movb 17(%rcx), %al 1356 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1357 ; CHECK-BASELINE-NEXT: movb 18(%rcx), %al 1358 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1359 ; CHECK-BASELINE-NEXT: movb 19(%rcx), %al 1360 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1361 ; CHECK-BASELINE-NEXT: movb 20(%rcx), %al 1362 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1363 ; CHECK-BASELINE-NEXT: movb 21(%rcx), %r12b 1364 ; CHECK-BASELINE-NEXT: movb 22(%rcx), %r10b 1365 ; CHECK-BASELINE-NEXT: movb 23(%rcx), %r11b 1366 ; CHECK-BASELINE-NEXT: movb 24(%rcx), %bpl 1367 ; CHECK-BASELINE-NEXT: movb 25(%rcx), %r13b 1368 ; CHECK-BASELINE-NEXT: movb 26(%rcx), %r9b 1369 ; CHECK-BASELINE-NEXT: movb 27(%rcx), %r8b 1370 ; CHECK-BASELINE-NEXT: movb 28(%rcx), %dil 1371 ; CHECK-BASELINE-NEXT: movb 29(%rcx), %sil 1372 ; CHECK-BASELINE-NEXT: movb 30(%rcx), %bl 1373 ; CHECK-BASELINE-NEXT: movb 31(%rcx), %al 1374 ; CHECK-BASELINE-NEXT: movb 31(%r14), %cl 1375 ; CHECK-BASELINE-NEXT: andb %al, %cl 1376 ; CHECK-BASELINE-NEXT: notb %al 1377 ; CHECK-BASELINE-NEXT: andb 31(%rdx), %al 1378 ; CHECK-BASELINE-NEXT: orb %cl, %al 1379 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1380 ; CHECK-BASELINE-NEXT: movb 30(%r14), %al 1381 ; CHECK-BASELINE-NEXT: andb %bl, %al 1382 ; CHECK-BASELINE-NEXT: notb %bl 1383 ; CHECK-BASELINE-NEXT: andb 30(%rdx), %bl 1384 ; CHECK-BASELINE-NEXT: orb %al, %bl 1385 ; CHECK-BASELINE-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1386 ; CHECK-BASELINE-NEXT: movb 29(%r14), %al 1387 ; CHECK-BASELINE-NEXT: andb %sil, %al 1388 ; CHECK-BASELINE-NEXT: notb %sil 1389 ; CHECK-BASELINE-NEXT: andb 29(%rdx), %sil 1390 ; CHECK-BASELINE-NEXT: orb %al, %sil 1391 ; CHECK-BASELINE-NEXT: movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1392 ; CHECK-BASELINE-NEXT: movb 28(%r14), %al 1393 ; CHECK-BASELINE-NEXT: andb %dil, %al 1394 ; CHECK-BASELINE-NEXT: notb %dil 1395 ; CHECK-BASELINE-NEXT: andb 28(%rdx), %dil 1396 ; CHECK-BASELINE-NEXT: orb %al, %dil 1397 ; CHECK-BASELINE-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1398 ; CHECK-BASELINE-NEXT: movb 27(%r14), %al 1399 ; CHECK-BASELINE-NEXT: andb %r8b, %al 1400 ; CHECK-BASELINE-NEXT: notb %r8b 1401 ; CHECK-BASELINE-NEXT: andb 27(%rdx), %r8b 1402 ; CHECK-BASELINE-NEXT: orb %al, %r8b 1403 ; CHECK-BASELINE-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1404 ; CHECK-BASELINE-NEXT: movb 26(%r14), %al 1405 ; CHECK-BASELINE-NEXT: andb %r9b, %al 1406 ; CHECK-BASELINE-NEXT: notb %r9b 1407 ; CHECK-BASELINE-NEXT: andb 26(%rdx), %r9b 1408 ; CHECK-BASELINE-NEXT: orb %al, %r9b 1409 ; CHECK-BASELINE-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1410 ; CHECK-BASELINE-NEXT: movb 25(%r14), %al 1411 ; CHECK-BASELINE-NEXT: andb %r13b, %al 1412 ; CHECK-BASELINE-NEXT: notb %r13b 1413 ; CHECK-BASELINE-NEXT: andb 25(%rdx), %r13b 1414 ; CHECK-BASELINE-NEXT: orb %al, %r13b 1415 ; CHECK-BASELINE-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1416 ; CHECK-BASELINE-NEXT: movb 24(%r14), %al 1417 ; CHECK-BASELINE-NEXT: andb %bpl, %al 1418 ; CHECK-BASELINE-NEXT: notb %bpl 1419 ; CHECK-BASELINE-NEXT: andb 24(%rdx), %bpl 1420 ; CHECK-BASELINE-NEXT: orb %al, %bpl 1421 ; CHECK-BASELINE-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1422 ; CHECK-BASELINE-NEXT: movb 23(%r14), %al 1423 ; CHECK-BASELINE-NEXT: andb %r11b, %al 1424 ; CHECK-BASELINE-NEXT: notb %r11b 1425 ; CHECK-BASELINE-NEXT: andb 23(%rdx), %r11b 1426 ; CHECK-BASELINE-NEXT: orb %al, %r11b 1427 ; CHECK-BASELINE-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1428 ; CHECK-BASELINE-NEXT: movb 22(%r14), %al 1429 ; CHECK-BASELINE-NEXT: andb %r10b, %al 1430 ; CHECK-BASELINE-NEXT: notb %r10b 1431 ; CHECK-BASELINE-NEXT: andb 22(%rdx), %r10b 1432 ; CHECK-BASELINE-NEXT: orb %al, %r10b 1433 ; CHECK-BASELINE-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1434 ; CHECK-BASELINE-NEXT: movb 21(%r14), %al 1435 ; CHECK-BASELINE-NEXT: andb %r12b, %al 1436 ; CHECK-BASELINE-NEXT: notb %r12b 1437 ; CHECK-BASELINE-NEXT: andb 21(%rdx), %r12b 1438 ; CHECK-BASELINE-NEXT: orb %al, %r12b 1439 ; CHECK-BASELINE-NEXT: movb %r12b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1440 ; CHECK-BASELINE-NEXT: movb 20(%r14), %al 1441 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1442 ; CHECK-BASELINE-NEXT: andb %cl, %al 1443 ; CHECK-BASELINE-NEXT: notb %cl 1444 ; CHECK-BASELINE-NEXT: andb 20(%rdx), %cl 1445 ; CHECK-BASELINE-NEXT: orb %al, %cl 1446 ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1447 ; CHECK-BASELINE-NEXT: movb 19(%r14), %al 1448 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1449 ; CHECK-BASELINE-NEXT: andb %cl, %al 1450 ; CHECK-BASELINE-NEXT: notb %cl 1451 ; CHECK-BASELINE-NEXT: andb 19(%rdx), %cl 1452 ; CHECK-BASELINE-NEXT: orb %al, %cl 1453 ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1454 ; CHECK-BASELINE-NEXT: movb 18(%r14), %al 1455 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1456 ; CHECK-BASELINE-NEXT: andb %cl, %al 1457 ; CHECK-BASELINE-NEXT: notb %cl 1458 ; CHECK-BASELINE-NEXT: andb 18(%rdx), %cl 1459 ; CHECK-BASELINE-NEXT: orb %al, %cl 1460 ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1461 ; CHECK-BASELINE-NEXT: movb 17(%r14), %al 1462 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1463 ; CHECK-BASELINE-NEXT: andb %cl, %al 1464 ; CHECK-BASELINE-NEXT: notb %cl 1465 ; CHECK-BASELINE-NEXT: andb 17(%rdx), %cl 1466 ; CHECK-BASELINE-NEXT: orb %al, %cl 1467 ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1468 ; CHECK-BASELINE-NEXT: movb 16(%r14), %al 1469 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1470 ; CHECK-BASELINE-NEXT: andb %cl, %al 1471 ; CHECK-BASELINE-NEXT: notb %cl 1472 ; CHECK-BASELINE-NEXT: movq %rdx, %rbx 1473 ; CHECK-BASELINE-NEXT: andb 16(%rdx), %cl 1474 ; CHECK-BASELINE-NEXT: orb %al, %cl 1475 ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1476 ; CHECK-BASELINE-NEXT: movb 15(%r14), %al 1477 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1478 ; CHECK-BASELINE-NEXT: andb %cl, %al 1479 ; CHECK-BASELINE-NEXT: notb %cl 1480 ; CHECK-BASELINE-NEXT: andb 15(%rdx), %cl 1481 ; CHECK-BASELINE-NEXT: orb %al, %cl 1482 ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1483 ; CHECK-BASELINE-NEXT: movb 14(%r15), %cl 1484 ; CHECK-BASELINE-NEXT: movb 14(%r14), %al 1485 ; CHECK-BASELINE-NEXT: andb %cl, %al 1486 ; CHECK-BASELINE-NEXT: notb %cl 1487 ; CHECK-BASELINE-NEXT: andb 14(%rdx), %cl 1488 ; CHECK-BASELINE-NEXT: orb %al, %cl 1489 ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1490 ; CHECK-BASELINE-NEXT: movb 13(%r15), %cl 1491 ; CHECK-BASELINE-NEXT: movb 13(%r14), %al 1492 ; CHECK-BASELINE-NEXT: andb %cl, %al 1493 ; CHECK-BASELINE-NEXT: notb %cl 1494 ; CHECK-BASELINE-NEXT: andb 13(%rdx), %cl 1495 ; CHECK-BASELINE-NEXT: orb %al, %cl 1496 ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1497 ; CHECK-BASELINE-NEXT: movb 12(%r15), %cl 1498 ; CHECK-BASELINE-NEXT: movb 12(%r14), %al 1499 ; CHECK-BASELINE-NEXT: andb %cl, %al 1500 ; CHECK-BASELINE-NEXT: notb %cl 1501 ; CHECK-BASELINE-NEXT: andb 12(%rdx), %cl 1502 ; CHECK-BASELINE-NEXT: orb %al, %cl 1503 ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1504 ; CHECK-BASELINE-NEXT: movb 11(%r15), %r13b 1505 ; CHECK-BASELINE-NEXT: movb 11(%r14), %al 1506 ; CHECK-BASELINE-NEXT: andb %r13b, %al 1507 ; CHECK-BASELINE-NEXT: notb %r13b 1508 ; CHECK-BASELINE-NEXT: andb 11(%rdx), %r13b 1509 ; CHECK-BASELINE-NEXT: orb %al, %r13b 1510 ; CHECK-BASELINE-NEXT: movb 10(%r15), %r12b 1511 ; CHECK-BASELINE-NEXT: movb 10(%r14), %al 1512 ; CHECK-BASELINE-NEXT: andb %r12b, %al 1513 ; CHECK-BASELINE-NEXT: notb %r12b 1514 ; CHECK-BASELINE-NEXT: andb 10(%rdx), %r12b 1515 ; CHECK-BASELINE-NEXT: orb %al, %r12b 1516 ; CHECK-BASELINE-NEXT: movb 9(%r15), %bpl 1517 ; CHECK-BASELINE-NEXT: movb 9(%r14), %al 1518 ; CHECK-BASELINE-NEXT: andb %bpl, %al 1519 ; CHECK-BASELINE-NEXT: notb %bpl 1520 ; CHECK-BASELINE-NEXT: andb 9(%rdx), %bpl 1521 ; CHECK-BASELINE-NEXT: orb %al, %bpl 1522 ; CHECK-BASELINE-NEXT: movb 8(%r15), %r11b 1523 ; CHECK-BASELINE-NEXT: movb 8(%r14), %al 1524 ; CHECK-BASELINE-NEXT: andb %r11b, %al 1525 ; CHECK-BASELINE-NEXT: notb %r11b 1526 ; CHECK-BASELINE-NEXT: andb 8(%rdx), %r11b 1527 ; CHECK-BASELINE-NEXT: orb %al, %r11b 1528 ; CHECK-BASELINE-NEXT: movb 7(%r15), %r10b 1529 ; CHECK-BASELINE-NEXT: movb 7(%r14), %al 1530 ; CHECK-BASELINE-NEXT: andb %r10b, %al 1531 ; CHECK-BASELINE-NEXT: notb %r10b 1532 ; CHECK-BASELINE-NEXT: andb 7(%rdx), %r10b 1533 ; CHECK-BASELINE-NEXT: orb %al, %r10b 1534 ; CHECK-BASELINE-NEXT: movb 6(%r15), %r9b 1535 ; CHECK-BASELINE-NEXT: movb 6(%r14), %al 1536 ; CHECK-BASELINE-NEXT: andb %r9b, %al 1537 ; CHECK-BASELINE-NEXT: notb %r9b 1538 ; CHECK-BASELINE-NEXT: andb 6(%rdx), %r9b 1539 ; CHECK-BASELINE-NEXT: orb %al, %r9b 1540 ; CHECK-BASELINE-NEXT: movb 5(%r15), %r8b 1541 ; CHECK-BASELINE-NEXT: movb 5(%r14), %al 1542 ; CHECK-BASELINE-NEXT: andb %r8b, %al 1543 ; CHECK-BASELINE-NEXT: notb %r8b 1544 ; CHECK-BASELINE-NEXT: andb 5(%rdx), %r8b 1545 ; CHECK-BASELINE-NEXT: orb %al, %r8b 1546 ; CHECK-BASELINE-NEXT: movb 4(%r15), %dil 1547 ; CHECK-BASELINE-NEXT: movb 4(%r14), %al 1548 ; CHECK-BASELINE-NEXT: andb %dil, %al 1549 ; CHECK-BASELINE-NEXT: notb %dil 1550 ; CHECK-BASELINE-NEXT: andb 4(%rdx), %dil 1551 ; CHECK-BASELINE-NEXT: orb %al, %dil 1552 ; CHECK-BASELINE-NEXT: movb 3(%r15), %sil 1553 ; CHECK-BASELINE-NEXT: movb 3(%r14), %al 1554 ; CHECK-BASELINE-NEXT: andb %sil, %al 1555 ; CHECK-BASELINE-NEXT: notb %sil 1556 ; CHECK-BASELINE-NEXT: andb 3(%rdx), %sil 1557 ; CHECK-BASELINE-NEXT: orb %al, %sil 1558 ; CHECK-BASELINE-NEXT: movb 2(%r15), %dl 1559 ; CHECK-BASELINE-NEXT: movb 2(%r14), %al 1560 ; CHECK-BASELINE-NEXT: andb %dl, %al 1561 ; CHECK-BASELINE-NEXT: notb %dl 1562 ; CHECK-BASELINE-NEXT: andb 2(%rbx), %dl 1563 ; CHECK-BASELINE-NEXT: orb %al, %dl 1564 ; CHECK-BASELINE-NEXT: movb 1(%r15), %al 1565 ; CHECK-BASELINE-NEXT: movb 1(%r14), %cl 1566 ; CHECK-BASELINE-NEXT: andb %al, %cl 1567 ; CHECK-BASELINE-NEXT: notb %al 1568 ; CHECK-BASELINE-NEXT: andb 1(%rbx), %al 1569 ; CHECK-BASELINE-NEXT: orb %cl, %al 1570 ; CHECK-BASELINE-NEXT: movb (%r15), %r15b 1571 ; CHECK-BASELINE-NEXT: movb (%r14), %r14b 1572 ; CHECK-BASELINE-NEXT: andb %r15b, %r14b 1573 ; CHECK-BASELINE-NEXT: notb %r15b 1574 ; CHECK-BASELINE-NEXT: andb (%rbx), %r15b 1575 ; CHECK-BASELINE-NEXT: orb %r14b, %r15b 1576 ; CHECK-BASELINE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 1577 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1578 ; CHECK-BASELINE-NEXT: movb %bl, 31(%rcx) 1579 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1580 ; CHECK-BASELINE-NEXT: movb %bl, 30(%rcx) 1581 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1582 ; CHECK-BASELINE-NEXT: movb %bl, 29(%rcx) 1583 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1584 ; CHECK-BASELINE-NEXT: movb %bl, 28(%rcx) 1585 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1586 ; CHECK-BASELINE-NEXT: movb %bl, 27(%rcx) 1587 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1588 ; CHECK-BASELINE-NEXT: movb %bl, 26(%rcx) 1589 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1590 ; CHECK-BASELINE-NEXT: movb %bl, 25(%rcx) 1591 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1592 ; CHECK-BASELINE-NEXT: movb %bl, 24(%rcx) 1593 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1594 ; CHECK-BASELINE-NEXT: movb %bl, 23(%rcx) 1595 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1596 ; CHECK-BASELINE-NEXT: movb %bl, 22(%rcx) 1597 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1598 ; CHECK-BASELINE-NEXT: movb %bl, 21(%rcx) 1599 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1600 ; CHECK-BASELINE-NEXT: movb %bl, 20(%rcx) 1601 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1602 ; CHECK-BASELINE-NEXT: movb %bl, 19(%rcx) 1603 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1604 ; CHECK-BASELINE-NEXT: movb %bl, 18(%rcx) 1605 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1606 ; CHECK-BASELINE-NEXT: movb %bl, 17(%rcx) 1607 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1608 ; CHECK-BASELINE-NEXT: movb %bl, 16(%rcx) 1609 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1610 ; CHECK-BASELINE-NEXT: movb %bl, 15(%rcx) 1611 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1612 ; CHECK-BASELINE-NEXT: movb %bl, 14(%rcx) 1613 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1614 ; CHECK-BASELINE-NEXT: movb %bl, 13(%rcx) 1615 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1616 ; CHECK-BASELINE-NEXT: movb %bl, 12(%rcx) 1617 ; CHECK-BASELINE-NEXT: movb %r13b, 11(%rcx) 1618 ; CHECK-BASELINE-NEXT: movb %r12b, 10(%rcx) 1619 ; CHECK-BASELINE-NEXT: movb %bpl, 9(%rcx) 1620 ; CHECK-BASELINE-NEXT: movb %r11b, 8(%rcx) 1621 ; CHECK-BASELINE-NEXT: movb %r10b, 7(%rcx) 1622 ; CHECK-BASELINE-NEXT: movb %r9b, 6(%rcx) 1623 ; CHECK-BASELINE-NEXT: movb %r8b, 5(%rcx) 1624 ; CHECK-BASELINE-NEXT: movb %dil, 4(%rcx) 1625 ; CHECK-BASELINE-NEXT: movb %sil, 3(%rcx) 1626 ; CHECK-BASELINE-NEXT: movb %dl, 2(%rcx) 1627 ; CHECK-BASELINE-NEXT: movb %al, 1(%rcx) 1628 ; CHECK-BASELINE-NEXT: movb %r15b, (%rcx) 1629 ; CHECK-BASELINE-NEXT: movq %rcx, %rax 1630 ; CHECK-BASELINE-NEXT: popq %rbx 1631 ; CHECK-BASELINE-NEXT: popq %r12 1632 ; CHECK-BASELINE-NEXT: popq %r13 1633 ; CHECK-BASELINE-NEXT: popq %r14 1634 ; CHECK-BASELINE-NEXT: popq %r15 1635 ; CHECK-BASELINE-NEXT: popq %rbp 1636 ; CHECK-BASELINE-NEXT: retq 1637 ; 1638 ; CHECK-SSE1-LABEL: out_v32i8: 1639 ; CHECK-SSE1: # %bb.0: 1640 ; CHECK-SSE1-NEXT: pushq %rbp 1641 ; CHECK-SSE1-NEXT: pushq %r15 1642 ; CHECK-SSE1-NEXT: pushq %r14 1643 ; CHECK-SSE1-NEXT: pushq %r13 1644 ; CHECK-SSE1-NEXT: pushq %r12 1645 ; CHECK-SSE1-NEXT: pushq %rbx 1646 ; CHECK-SSE1-NEXT: movq %rcx, %r15 1647 ; CHECK-SSE1-NEXT: movq %rsi, %r14 1648 ; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1649 ; CHECK-SSE1-NEXT: movb 15(%rcx), %al 1650 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1651 ; CHECK-SSE1-NEXT: movb 16(%rcx), %al 1652 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1653 ; CHECK-SSE1-NEXT: movb 17(%rcx), %al 1654 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1655 ; CHECK-SSE1-NEXT: movb 18(%rcx), %al 1656 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1657 ; CHECK-SSE1-NEXT: movb 19(%rcx), %al 1658 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1659 ; CHECK-SSE1-NEXT: movb 20(%rcx), %al 1660 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1661 ; CHECK-SSE1-NEXT: movb 21(%rcx), %r12b 1662 ; CHECK-SSE1-NEXT: movb 22(%rcx), %r10b 1663 ; CHECK-SSE1-NEXT: movb 23(%rcx), %r11b 1664 ; CHECK-SSE1-NEXT: movb 24(%rcx), %bpl 1665 ; CHECK-SSE1-NEXT: movb 25(%rcx), %r13b 1666 ; CHECK-SSE1-NEXT: movb 26(%rcx), %r9b 1667 ; CHECK-SSE1-NEXT: movb 27(%rcx), %r8b 1668 ; CHECK-SSE1-NEXT: movb 28(%rcx), %dil 1669 ; CHECK-SSE1-NEXT: movb 29(%rcx), %sil 1670 ; CHECK-SSE1-NEXT: movb 30(%rcx), %bl 1671 ; CHECK-SSE1-NEXT: movb 31(%rcx), %al 1672 ; CHECK-SSE1-NEXT: movb 31(%r14), %cl 1673 ; CHECK-SSE1-NEXT: andb %al, %cl 1674 ; CHECK-SSE1-NEXT: notb %al 1675 ; CHECK-SSE1-NEXT: andb 31(%rdx), %al 1676 ; CHECK-SSE1-NEXT: orb %cl, %al 1677 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1678 ; CHECK-SSE1-NEXT: movb 30(%r14), %al 1679 ; CHECK-SSE1-NEXT: andb %bl, %al 1680 ; CHECK-SSE1-NEXT: notb %bl 1681 ; CHECK-SSE1-NEXT: andb 30(%rdx), %bl 1682 ; CHECK-SSE1-NEXT: orb %al, %bl 1683 ; CHECK-SSE1-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1684 ; CHECK-SSE1-NEXT: movb 29(%r14), %al 1685 ; CHECK-SSE1-NEXT: andb %sil, %al 1686 ; CHECK-SSE1-NEXT: notb %sil 1687 ; CHECK-SSE1-NEXT: andb 29(%rdx), %sil 1688 ; CHECK-SSE1-NEXT: orb %al, %sil 1689 ; CHECK-SSE1-NEXT: movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1690 ; CHECK-SSE1-NEXT: movb 28(%r14), %al 1691 ; CHECK-SSE1-NEXT: andb %dil, %al 1692 ; CHECK-SSE1-NEXT: notb %dil 1693 ; CHECK-SSE1-NEXT: andb 28(%rdx), %dil 1694 ; CHECK-SSE1-NEXT: orb %al, %dil 1695 ; CHECK-SSE1-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1696 ; CHECK-SSE1-NEXT: movb 27(%r14), %al 1697 ; CHECK-SSE1-NEXT: andb %r8b, %al 1698 ; CHECK-SSE1-NEXT: notb %r8b 1699 ; CHECK-SSE1-NEXT: andb 27(%rdx), %r8b 1700 ; CHECK-SSE1-NEXT: orb %al, %r8b 1701 ; CHECK-SSE1-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1702 ; CHECK-SSE1-NEXT: movb 26(%r14), %al 1703 ; CHECK-SSE1-NEXT: andb %r9b, %al 1704 ; CHECK-SSE1-NEXT: notb %r9b 1705 ; CHECK-SSE1-NEXT: andb 26(%rdx), %r9b 1706 ; CHECK-SSE1-NEXT: orb %al, %r9b 1707 ; CHECK-SSE1-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1708 ; CHECK-SSE1-NEXT: movb 25(%r14), %al 1709 ; CHECK-SSE1-NEXT: andb %r13b, %al 1710 ; CHECK-SSE1-NEXT: notb %r13b 1711 ; CHECK-SSE1-NEXT: andb 25(%rdx), %r13b 1712 ; CHECK-SSE1-NEXT: orb %al, %r13b 1713 ; CHECK-SSE1-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1714 ; CHECK-SSE1-NEXT: movb 24(%r14), %al 1715 ; CHECK-SSE1-NEXT: andb %bpl, %al 1716 ; CHECK-SSE1-NEXT: notb %bpl 1717 ; CHECK-SSE1-NEXT: andb 24(%rdx), %bpl 1718 ; CHECK-SSE1-NEXT: orb %al, %bpl 1719 ; CHECK-SSE1-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1720 ; CHECK-SSE1-NEXT: movb 23(%r14), %al 1721 ; CHECK-SSE1-NEXT: andb %r11b, %al 1722 ; CHECK-SSE1-NEXT: notb %r11b 1723 ; CHECK-SSE1-NEXT: andb 23(%rdx), %r11b 1724 ; CHECK-SSE1-NEXT: orb %al, %r11b 1725 ; CHECK-SSE1-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1726 ; CHECK-SSE1-NEXT: movb 22(%r14), %al 1727 ; CHECK-SSE1-NEXT: andb %r10b, %al 1728 ; CHECK-SSE1-NEXT: notb %r10b 1729 ; CHECK-SSE1-NEXT: andb 22(%rdx), %r10b 1730 ; CHECK-SSE1-NEXT: orb %al, %r10b 1731 ; CHECK-SSE1-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1732 ; CHECK-SSE1-NEXT: movb 21(%r14), %al 1733 ; CHECK-SSE1-NEXT: andb %r12b, %al 1734 ; CHECK-SSE1-NEXT: notb %r12b 1735 ; CHECK-SSE1-NEXT: andb 21(%rdx), %r12b 1736 ; CHECK-SSE1-NEXT: orb %al, %r12b 1737 ; CHECK-SSE1-NEXT: movb %r12b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1738 ; CHECK-SSE1-NEXT: movb 20(%r14), %al 1739 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1740 ; CHECK-SSE1-NEXT: andb %cl, %al 1741 ; CHECK-SSE1-NEXT: notb %cl 1742 ; CHECK-SSE1-NEXT: andb 20(%rdx), %cl 1743 ; CHECK-SSE1-NEXT: orb %al, %cl 1744 ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1745 ; CHECK-SSE1-NEXT: movb 19(%r14), %al 1746 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1747 ; CHECK-SSE1-NEXT: andb %cl, %al 1748 ; CHECK-SSE1-NEXT: notb %cl 1749 ; CHECK-SSE1-NEXT: andb 19(%rdx), %cl 1750 ; CHECK-SSE1-NEXT: orb %al, %cl 1751 ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1752 ; CHECK-SSE1-NEXT: movb 18(%r14), %al 1753 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1754 ; CHECK-SSE1-NEXT: andb %cl, %al 1755 ; CHECK-SSE1-NEXT: notb %cl 1756 ; CHECK-SSE1-NEXT: andb 18(%rdx), %cl 1757 ; CHECK-SSE1-NEXT: orb %al, %cl 1758 ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1759 ; CHECK-SSE1-NEXT: movb 17(%r14), %al 1760 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1761 ; CHECK-SSE1-NEXT: andb %cl, %al 1762 ; CHECK-SSE1-NEXT: notb %cl 1763 ; CHECK-SSE1-NEXT: andb 17(%rdx), %cl 1764 ; CHECK-SSE1-NEXT: orb %al, %cl 1765 ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1766 ; CHECK-SSE1-NEXT: movb 16(%r14), %al 1767 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1768 ; CHECK-SSE1-NEXT: andb %cl, %al 1769 ; CHECK-SSE1-NEXT: notb %cl 1770 ; CHECK-SSE1-NEXT: movq %rdx, %rbx 1771 ; CHECK-SSE1-NEXT: andb 16(%rdx), %cl 1772 ; CHECK-SSE1-NEXT: orb %al, %cl 1773 ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1774 ; CHECK-SSE1-NEXT: movb 15(%r14), %al 1775 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1776 ; CHECK-SSE1-NEXT: andb %cl, %al 1777 ; CHECK-SSE1-NEXT: notb %cl 1778 ; CHECK-SSE1-NEXT: andb 15(%rdx), %cl 1779 ; CHECK-SSE1-NEXT: orb %al, %cl 1780 ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1781 ; CHECK-SSE1-NEXT: movb 14(%r15), %cl 1782 ; CHECK-SSE1-NEXT: movb 14(%r14), %al 1783 ; CHECK-SSE1-NEXT: andb %cl, %al 1784 ; CHECK-SSE1-NEXT: notb %cl 1785 ; CHECK-SSE1-NEXT: andb 14(%rdx), %cl 1786 ; CHECK-SSE1-NEXT: orb %al, %cl 1787 ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1788 ; CHECK-SSE1-NEXT: movb 13(%r15), %cl 1789 ; CHECK-SSE1-NEXT: movb 13(%r14), %al 1790 ; CHECK-SSE1-NEXT: andb %cl, %al 1791 ; CHECK-SSE1-NEXT: notb %cl 1792 ; CHECK-SSE1-NEXT: andb 13(%rdx), %cl 1793 ; CHECK-SSE1-NEXT: orb %al, %cl 1794 ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1795 ; CHECK-SSE1-NEXT: movb 12(%r15), %cl 1796 ; CHECK-SSE1-NEXT: movb 12(%r14), %al 1797 ; CHECK-SSE1-NEXT: andb %cl, %al 1798 ; CHECK-SSE1-NEXT: notb %cl 1799 ; CHECK-SSE1-NEXT: andb 12(%rdx), %cl 1800 ; CHECK-SSE1-NEXT: orb %al, %cl 1801 ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1802 ; CHECK-SSE1-NEXT: movb 11(%r15), %r13b 1803 ; CHECK-SSE1-NEXT: movb 11(%r14), %al 1804 ; CHECK-SSE1-NEXT: andb %r13b, %al 1805 ; CHECK-SSE1-NEXT: notb %r13b 1806 ; CHECK-SSE1-NEXT: andb 11(%rdx), %r13b 1807 ; CHECK-SSE1-NEXT: orb %al, %r13b 1808 ; CHECK-SSE1-NEXT: movb 10(%r15), %r12b 1809 ; CHECK-SSE1-NEXT: movb 10(%r14), %al 1810 ; CHECK-SSE1-NEXT: andb %r12b, %al 1811 ; CHECK-SSE1-NEXT: notb %r12b 1812 ; CHECK-SSE1-NEXT: andb 10(%rdx), %r12b 1813 ; CHECK-SSE1-NEXT: orb %al, %r12b 1814 ; CHECK-SSE1-NEXT: movb 9(%r15), %bpl 1815 ; CHECK-SSE1-NEXT: movb 9(%r14), %al 1816 ; CHECK-SSE1-NEXT: andb %bpl, %al 1817 ; CHECK-SSE1-NEXT: notb %bpl 1818 ; CHECK-SSE1-NEXT: andb 9(%rdx), %bpl 1819 ; CHECK-SSE1-NEXT: orb %al, %bpl 1820 ; CHECK-SSE1-NEXT: movb 8(%r15), %r11b 1821 ; CHECK-SSE1-NEXT: movb 8(%r14), %al 1822 ; CHECK-SSE1-NEXT: andb %r11b, %al 1823 ; CHECK-SSE1-NEXT: notb %r11b 1824 ; CHECK-SSE1-NEXT: andb 8(%rdx), %r11b 1825 ; CHECK-SSE1-NEXT: orb %al, %r11b 1826 ; CHECK-SSE1-NEXT: movb 7(%r15), %r10b 1827 ; CHECK-SSE1-NEXT: movb 7(%r14), %al 1828 ; CHECK-SSE1-NEXT: andb %r10b, %al 1829 ; CHECK-SSE1-NEXT: notb %r10b 1830 ; CHECK-SSE1-NEXT: andb 7(%rdx), %r10b 1831 ; CHECK-SSE1-NEXT: orb %al, %r10b 1832 ; CHECK-SSE1-NEXT: movb 6(%r15), %r9b 1833 ; CHECK-SSE1-NEXT: movb 6(%r14), %al 1834 ; CHECK-SSE1-NEXT: andb %r9b, %al 1835 ; CHECK-SSE1-NEXT: notb %r9b 1836 ; CHECK-SSE1-NEXT: andb 6(%rdx), %r9b 1837 ; CHECK-SSE1-NEXT: orb %al, %r9b 1838 ; CHECK-SSE1-NEXT: movb 5(%r15), %r8b 1839 ; CHECK-SSE1-NEXT: movb 5(%r14), %al 1840 ; CHECK-SSE1-NEXT: andb %r8b, %al 1841 ; CHECK-SSE1-NEXT: notb %r8b 1842 ; CHECK-SSE1-NEXT: andb 5(%rdx), %r8b 1843 ; CHECK-SSE1-NEXT: orb %al, %r8b 1844 ; CHECK-SSE1-NEXT: movb 4(%r15), %dil 1845 ; CHECK-SSE1-NEXT: movb 4(%r14), %al 1846 ; CHECK-SSE1-NEXT: andb %dil, %al 1847 ; CHECK-SSE1-NEXT: notb %dil 1848 ; CHECK-SSE1-NEXT: andb 4(%rdx), %dil 1849 ; CHECK-SSE1-NEXT: orb %al, %dil 1850 ; CHECK-SSE1-NEXT: movb 3(%r15), %sil 1851 ; CHECK-SSE1-NEXT: movb 3(%r14), %al 1852 ; CHECK-SSE1-NEXT: andb %sil, %al 1853 ; CHECK-SSE1-NEXT: notb %sil 1854 ; CHECK-SSE1-NEXT: andb 3(%rdx), %sil 1855 ; CHECK-SSE1-NEXT: orb %al, %sil 1856 ; CHECK-SSE1-NEXT: movb 2(%r15), %dl 1857 ; CHECK-SSE1-NEXT: movb 2(%r14), %al 1858 ; CHECK-SSE1-NEXT: andb %dl, %al 1859 ; CHECK-SSE1-NEXT: notb %dl 1860 ; CHECK-SSE1-NEXT: andb 2(%rbx), %dl 1861 ; CHECK-SSE1-NEXT: orb %al, %dl 1862 ; CHECK-SSE1-NEXT: movb 1(%r15), %al 1863 ; CHECK-SSE1-NEXT: movb 1(%r14), %cl 1864 ; CHECK-SSE1-NEXT: andb %al, %cl 1865 ; CHECK-SSE1-NEXT: notb %al 1866 ; CHECK-SSE1-NEXT: andb 1(%rbx), %al 1867 ; CHECK-SSE1-NEXT: orb %cl, %al 1868 ; CHECK-SSE1-NEXT: movb (%r15), %r15b 1869 ; CHECK-SSE1-NEXT: movb (%r14), %r14b 1870 ; CHECK-SSE1-NEXT: andb %r15b, %r14b 1871 ; CHECK-SSE1-NEXT: notb %r15b 1872 ; CHECK-SSE1-NEXT: andb (%rbx), %r15b 1873 ; CHECK-SSE1-NEXT: orb %r14b, %r15b 1874 ; CHECK-SSE1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 1875 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1876 ; CHECK-SSE1-NEXT: movb %bl, 31(%rcx) 1877 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1878 ; CHECK-SSE1-NEXT: movb %bl, 30(%rcx) 1879 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1880 ; CHECK-SSE1-NEXT: movb %bl, 29(%rcx) 1881 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1882 ; CHECK-SSE1-NEXT: movb %bl, 28(%rcx) 1883 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1884 ; CHECK-SSE1-NEXT: movb %bl, 27(%rcx) 1885 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1886 ; CHECK-SSE1-NEXT: movb %bl, 26(%rcx) 1887 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1888 ; CHECK-SSE1-NEXT: movb %bl, 25(%rcx) 1889 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1890 ; CHECK-SSE1-NEXT: movb %bl, 24(%rcx) 1891 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1892 ; CHECK-SSE1-NEXT: movb %bl, 23(%rcx) 1893 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1894 ; CHECK-SSE1-NEXT: movb %bl, 22(%rcx) 1895 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1896 ; CHECK-SSE1-NEXT: movb %bl, 21(%rcx) 1897 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1898 ; CHECK-SSE1-NEXT: movb %bl, 20(%rcx) 1899 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1900 ; CHECK-SSE1-NEXT: movb %bl, 19(%rcx) 1901 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1902 ; CHECK-SSE1-NEXT: movb %bl, 18(%rcx) 1903 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1904 ; CHECK-SSE1-NEXT: movb %bl, 17(%rcx) 1905 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1906 ; CHECK-SSE1-NEXT: movb %bl, 16(%rcx) 1907 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1908 ; CHECK-SSE1-NEXT: movb %bl, 15(%rcx) 1909 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1910 ; CHECK-SSE1-NEXT: movb %bl, 14(%rcx) 1911 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1912 ; CHECK-SSE1-NEXT: movb %bl, 13(%rcx) 1913 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1914 ; CHECK-SSE1-NEXT: movb %bl, 12(%rcx) 1915 ; CHECK-SSE1-NEXT: movb %r13b, 11(%rcx) 1916 ; CHECK-SSE1-NEXT: movb %r12b, 10(%rcx) 1917 ; CHECK-SSE1-NEXT: movb %bpl, 9(%rcx) 1918 ; CHECK-SSE1-NEXT: movb %r11b, 8(%rcx) 1919 ; CHECK-SSE1-NEXT: movb %r10b, 7(%rcx) 1920 ; CHECK-SSE1-NEXT: movb %r9b, 6(%rcx) 1921 ; CHECK-SSE1-NEXT: movb %r8b, 5(%rcx) 1922 ; CHECK-SSE1-NEXT: movb %dil, 4(%rcx) 1923 ; CHECK-SSE1-NEXT: movb %sil, 3(%rcx) 1924 ; CHECK-SSE1-NEXT: movb %dl, 2(%rcx) 1925 ; CHECK-SSE1-NEXT: movb %al, 1(%rcx) 1926 ; CHECK-SSE1-NEXT: movb %r15b, (%rcx) 1927 ; CHECK-SSE1-NEXT: movq %rcx, %rax 1928 ; CHECK-SSE1-NEXT: popq %rbx 1929 ; CHECK-SSE1-NEXT: popq %r12 1930 ; CHECK-SSE1-NEXT: popq %r13 1931 ; CHECK-SSE1-NEXT: popq %r14 1932 ; CHECK-SSE1-NEXT: popq %r15 1933 ; CHECK-SSE1-NEXT: popq %rbp 1934 ; CHECK-SSE1-NEXT: retq 1935 ; 1936 ; CHECK-SSE2-LABEL: out_v32i8: 1937 ; CHECK-SSE2: # %bb.0: 1938 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 1939 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 1940 ; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 1941 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 1942 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 1943 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 1944 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 1945 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 1946 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 1947 ; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 1948 ; CHECK-SSE2-NEXT: retq 1949 ; 1950 ; CHECK-XOP-LABEL: out_v32i8: 1951 ; CHECK-XOP: # %bb.0: 1952 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 1953 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 1954 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 1955 ; CHECK-XOP-NEXT: retq 1956 %x = load <32 x i8>, <32 x i8> *%px, align 32 1957 %y = load <32 x i8>, <32 x i8> *%py, align 32 1958 %mask = load <32 x i8>, <32 x i8> *%pmask, align 32 1959 %mx = and <32 x i8> %x, %mask 1960 %notmask = xor <32 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1961 %my = and <32 x i8> %y, %notmask 1962 %r = or <32 x i8> %mx, %my 1963 ret <32 x i8> %r 1964 } 1965 1966 define <16 x i16> @out_v16i16(<16 x i16> *%px, <16 x i16> *%py, <16 x i16> *%pmask) nounwind { 1967 ; CHECK-BASELINE-LABEL: out_v16i16: 1968 ; CHECK-BASELINE: # %bb.0: 1969 ; CHECK-BASELINE-NEXT: pushq %rbp 1970 ; CHECK-BASELINE-NEXT: pushq %r15 1971 ; CHECK-BASELINE-NEXT: pushq %r14 1972 ; CHECK-BASELINE-NEXT: pushq %r13 1973 ; CHECK-BASELINE-NEXT: pushq %r12 1974 ; CHECK-BASELINE-NEXT: pushq %rbx 1975 ; CHECK-BASELINE-NEXT: movq %rcx, %r9 1976 ; CHECK-BASELINE-NEXT: movq %rdx, %r10 1977 ; CHECK-BASELINE-NEXT: movq %rsi, %r8 1978 ; CHECK-BASELINE-NEXT: movq %rdi, %r11 1979 ; CHECK-BASELINE-NEXT: movl 12(%rcx), %eax 1980 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1981 ; CHECK-BASELINE-NEXT: movzwl 14(%rcx), %edx 1982 ; CHECK-BASELINE-NEXT: movl 16(%rcx), %esi 1983 ; CHECK-BASELINE-NEXT: movzwl 18(%rcx), %edi 1984 ; CHECK-BASELINE-NEXT: movl 20(%rcx), %ecx 1985 ; CHECK-BASELINE-NEXT: movzwl 22(%r9), %r15d 1986 ; CHECK-BASELINE-NEXT: movl 24(%r9), %r12d 1987 ; CHECK-BASELINE-NEXT: movzwl 26(%r9), %r14d 1988 ; CHECK-BASELINE-NEXT: movl 28(%r9), %ebx 1989 ; CHECK-BASELINE-NEXT: movzwl 30(%r9), %ebp 1990 ; CHECK-BASELINE-NEXT: movzwl 30(%r8), %r13d 1991 ; CHECK-BASELINE-NEXT: andw %bp, %r13w 1992 ; CHECK-BASELINE-NEXT: notl %ebp 1993 ; CHECK-BASELINE-NEXT: andw 30(%r10), %bp 1994 ; CHECK-BASELINE-NEXT: orl %r13d, %ebp 1995 ; CHECK-BASELINE-NEXT: movzwl 28(%r8), %eax 1996 ; CHECK-BASELINE-NEXT: andw %bx, %ax 1997 ; CHECK-BASELINE-NEXT: notl %ebx 1998 ; CHECK-BASELINE-NEXT: andw 28(%r10), %bx 1999 ; CHECK-BASELINE-NEXT: orl %eax, %ebx 2000 ; CHECK-BASELINE-NEXT: movzwl 26(%r8), %eax 2001 ; CHECK-BASELINE-NEXT: andw %r14w, %ax 2002 ; CHECK-BASELINE-NEXT: notl %r14d 2003 ; CHECK-BASELINE-NEXT: andw 26(%r10), %r14w 2004 ; CHECK-BASELINE-NEXT: orl %eax, %r14d 2005 ; CHECK-BASELINE-NEXT: movzwl 24(%r8), %eax 2006 ; CHECK-BASELINE-NEXT: andw %r12w, %ax 2007 ; CHECK-BASELINE-NEXT: notl %r12d 2008 ; CHECK-BASELINE-NEXT: andw 24(%r10), %r12w 2009 ; CHECK-BASELINE-NEXT: orl %eax, %r12d 2010 ; CHECK-BASELINE-NEXT: movzwl 22(%r8), %eax 2011 ; CHECK-BASELINE-NEXT: andw %r15w, %ax 2012 ; CHECK-BASELINE-NEXT: notl %r15d 2013 ; CHECK-BASELINE-NEXT: andw 22(%r10), %r15w 2014 ; CHECK-BASELINE-NEXT: orl %eax, %r15d 2015 ; CHECK-BASELINE-NEXT: movzwl 20(%r8), %eax 2016 ; CHECK-BASELINE-NEXT: andw %cx, %ax 2017 ; CHECK-BASELINE-NEXT: notl %ecx 2018 ; CHECK-BASELINE-NEXT: andw 20(%r10), %cx 2019 ; CHECK-BASELINE-NEXT: orl %eax, %ecx 2020 ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2021 ; CHECK-BASELINE-NEXT: movzwl 18(%r8), %eax 2022 ; CHECK-BASELINE-NEXT: andw %di, %ax 2023 ; CHECK-BASELINE-NEXT: notl %edi 2024 ; CHECK-BASELINE-NEXT: andw 18(%r10), %di 2025 ; CHECK-BASELINE-NEXT: orl %eax, %edi 2026 ; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2027 ; CHECK-BASELINE-NEXT: movzwl 16(%r8), %eax 2028 ; CHECK-BASELINE-NEXT: andw %si, %ax 2029 ; CHECK-BASELINE-NEXT: notl %esi 2030 ; CHECK-BASELINE-NEXT: andw 16(%r10), %si 2031 ; CHECK-BASELINE-NEXT: orl %eax, %esi 2032 ; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2033 ; CHECK-BASELINE-NEXT: movzwl 14(%r8), %eax 2034 ; CHECK-BASELINE-NEXT: andw %dx, %ax 2035 ; CHECK-BASELINE-NEXT: notl %edx 2036 ; CHECK-BASELINE-NEXT: andw 14(%r10), %dx 2037 ; CHECK-BASELINE-NEXT: orl %eax, %edx 2038 ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2039 ; CHECK-BASELINE-NEXT: movzwl 12(%r8), %eax 2040 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2041 ; CHECK-BASELINE-NEXT: andw %cx, %ax 2042 ; CHECK-BASELINE-NEXT: notl %ecx 2043 ; CHECK-BASELINE-NEXT: andw 12(%r10), %cx 2044 ; CHECK-BASELINE-NEXT: orl %eax, %ecx 2045 ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2046 ; CHECK-BASELINE-NEXT: movzwl 10(%r9), %r13d 2047 ; CHECK-BASELINE-NEXT: movzwl 10(%r8), %eax 2048 ; CHECK-BASELINE-NEXT: andw %r13w, %ax 2049 ; CHECK-BASELINE-NEXT: notl %r13d 2050 ; CHECK-BASELINE-NEXT: andw 10(%r10), %r13w 2051 ; CHECK-BASELINE-NEXT: orl %eax, %r13d 2052 ; CHECK-BASELINE-NEXT: movl 8(%r9), %edi 2053 ; CHECK-BASELINE-NEXT: movzwl 8(%r8), %eax 2054 ; CHECK-BASELINE-NEXT: andw %di, %ax 2055 ; CHECK-BASELINE-NEXT: notl %edi 2056 ; CHECK-BASELINE-NEXT: andw 8(%r10), %di 2057 ; CHECK-BASELINE-NEXT: orl %eax, %edi 2058 ; CHECK-BASELINE-NEXT: movzwl 6(%r9), %esi 2059 ; CHECK-BASELINE-NEXT: movzwl 6(%r8), %eax 2060 ; CHECK-BASELINE-NEXT: andw %si, %ax 2061 ; CHECK-BASELINE-NEXT: notl %esi 2062 ; CHECK-BASELINE-NEXT: andw 6(%r10), %si 2063 ; CHECK-BASELINE-NEXT: orl %eax, %esi 2064 ; CHECK-BASELINE-NEXT: movl 4(%r9), %edx 2065 ; CHECK-BASELINE-NEXT: movzwl 4(%r8), %eax 2066 ; CHECK-BASELINE-NEXT: andw %dx, %ax 2067 ; CHECK-BASELINE-NEXT: notl %edx 2068 ; CHECK-BASELINE-NEXT: andw 4(%r10), %dx 2069 ; CHECK-BASELINE-NEXT: orl %eax, %edx 2070 ; CHECK-BASELINE-NEXT: movzwl 2(%r9), %eax 2071 ; CHECK-BASELINE-NEXT: movzwl 2(%r8), %ecx 2072 ; CHECK-BASELINE-NEXT: andw %ax, %cx 2073 ; CHECK-BASELINE-NEXT: notl %eax 2074 ; CHECK-BASELINE-NEXT: andw 2(%r10), %ax 2075 ; CHECK-BASELINE-NEXT: orl %ecx, %eax 2076 ; CHECK-BASELINE-NEXT: movl (%r9), %r9d 2077 ; CHECK-BASELINE-NEXT: movzwl (%r8), %ecx 2078 ; CHECK-BASELINE-NEXT: andw %r9w, %cx 2079 ; CHECK-BASELINE-NEXT: notl %r9d 2080 ; CHECK-BASELINE-NEXT: andw (%r10), %r9w 2081 ; CHECK-BASELINE-NEXT: orl %ecx, %r9d 2082 ; CHECK-BASELINE-NEXT: movw %bp, 30(%r11) 2083 ; CHECK-BASELINE-NEXT: movw %bx, 28(%r11) 2084 ; CHECK-BASELINE-NEXT: movw %r14w, 26(%r11) 2085 ; CHECK-BASELINE-NEXT: movw %r12w, 24(%r11) 2086 ; CHECK-BASELINE-NEXT: movw %r15w, 22(%r11) 2087 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2088 ; CHECK-BASELINE-NEXT: movw %cx, 20(%r11) 2089 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2090 ; CHECK-BASELINE-NEXT: movw %cx, 18(%r11) 2091 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2092 ; CHECK-BASELINE-NEXT: movw %cx, 16(%r11) 2093 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2094 ; CHECK-BASELINE-NEXT: movw %cx, 14(%r11) 2095 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2096 ; CHECK-BASELINE-NEXT: movw %cx, 12(%r11) 2097 ; CHECK-BASELINE-NEXT: movw %r13w, 10(%r11) 2098 ; CHECK-BASELINE-NEXT: movw %di, 8(%r11) 2099 ; CHECK-BASELINE-NEXT: movw %si, 6(%r11) 2100 ; CHECK-BASELINE-NEXT: movw %dx, 4(%r11) 2101 ; CHECK-BASELINE-NEXT: movw %ax, 2(%r11) 2102 ; CHECK-BASELINE-NEXT: movw %r9w, (%r11) 2103 ; CHECK-BASELINE-NEXT: movq %r11, %rax 2104 ; CHECK-BASELINE-NEXT: popq %rbx 2105 ; CHECK-BASELINE-NEXT: popq %r12 2106 ; CHECK-BASELINE-NEXT: popq %r13 2107 ; CHECK-BASELINE-NEXT: popq %r14 2108 ; CHECK-BASELINE-NEXT: popq %r15 2109 ; CHECK-BASELINE-NEXT: popq %rbp 2110 ; CHECK-BASELINE-NEXT: retq 2111 ; 2112 ; CHECK-SSE1-LABEL: out_v16i16: 2113 ; CHECK-SSE1: # %bb.0: 2114 ; CHECK-SSE1-NEXT: pushq %rbp 2115 ; CHECK-SSE1-NEXT: pushq %r15 2116 ; CHECK-SSE1-NEXT: pushq %r14 2117 ; CHECK-SSE1-NEXT: pushq %r13 2118 ; CHECK-SSE1-NEXT: pushq %r12 2119 ; CHECK-SSE1-NEXT: pushq %rbx 2120 ; CHECK-SSE1-NEXT: movq %rcx, %r9 2121 ; CHECK-SSE1-NEXT: movq %rdx, %r10 2122 ; CHECK-SSE1-NEXT: movq %rsi, %r8 2123 ; CHECK-SSE1-NEXT: movq %rdi, %r11 2124 ; CHECK-SSE1-NEXT: movl 12(%rcx), %eax 2125 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2126 ; CHECK-SSE1-NEXT: movzwl 14(%rcx), %edx 2127 ; CHECK-SSE1-NEXT: movl 16(%rcx), %esi 2128 ; CHECK-SSE1-NEXT: movzwl 18(%rcx), %edi 2129 ; CHECK-SSE1-NEXT: movl 20(%rcx), %ecx 2130 ; CHECK-SSE1-NEXT: movzwl 22(%r9), %r15d 2131 ; CHECK-SSE1-NEXT: movl 24(%r9), %r12d 2132 ; CHECK-SSE1-NEXT: movzwl 26(%r9), %r14d 2133 ; CHECK-SSE1-NEXT: movl 28(%r9), %ebx 2134 ; CHECK-SSE1-NEXT: movzwl 30(%r9), %ebp 2135 ; CHECK-SSE1-NEXT: movzwl 30(%r8), %r13d 2136 ; CHECK-SSE1-NEXT: andw %bp, %r13w 2137 ; CHECK-SSE1-NEXT: notl %ebp 2138 ; CHECK-SSE1-NEXT: andw 30(%r10), %bp 2139 ; CHECK-SSE1-NEXT: orl %r13d, %ebp 2140 ; CHECK-SSE1-NEXT: movzwl 28(%r8), %eax 2141 ; CHECK-SSE1-NEXT: andw %bx, %ax 2142 ; CHECK-SSE1-NEXT: notl %ebx 2143 ; CHECK-SSE1-NEXT: andw 28(%r10), %bx 2144 ; CHECK-SSE1-NEXT: orl %eax, %ebx 2145 ; CHECK-SSE1-NEXT: movzwl 26(%r8), %eax 2146 ; CHECK-SSE1-NEXT: andw %r14w, %ax 2147 ; CHECK-SSE1-NEXT: notl %r14d 2148 ; CHECK-SSE1-NEXT: andw 26(%r10), %r14w 2149 ; CHECK-SSE1-NEXT: orl %eax, %r14d 2150 ; CHECK-SSE1-NEXT: movzwl 24(%r8), %eax 2151 ; CHECK-SSE1-NEXT: andw %r12w, %ax 2152 ; CHECK-SSE1-NEXT: notl %r12d 2153 ; CHECK-SSE1-NEXT: andw 24(%r10), %r12w 2154 ; CHECK-SSE1-NEXT: orl %eax, %r12d 2155 ; CHECK-SSE1-NEXT: movzwl 22(%r8), %eax 2156 ; CHECK-SSE1-NEXT: andw %r15w, %ax 2157 ; CHECK-SSE1-NEXT: notl %r15d 2158 ; CHECK-SSE1-NEXT: andw 22(%r10), %r15w 2159 ; CHECK-SSE1-NEXT: orl %eax, %r15d 2160 ; CHECK-SSE1-NEXT: movzwl 20(%r8), %eax 2161 ; CHECK-SSE1-NEXT: andw %cx, %ax 2162 ; CHECK-SSE1-NEXT: notl %ecx 2163 ; CHECK-SSE1-NEXT: andw 20(%r10), %cx 2164 ; CHECK-SSE1-NEXT: orl %eax, %ecx 2165 ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2166 ; CHECK-SSE1-NEXT: movzwl 18(%r8), %eax 2167 ; CHECK-SSE1-NEXT: andw %di, %ax 2168 ; CHECK-SSE1-NEXT: notl %edi 2169 ; CHECK-SSE1-NEXT: andw 18(%r10), %di 2170 ; CHECK-SSE1-NEXT: orl %eax, %edi 2171 ; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2172 ; CHECK-SSE1-NEXT: movzwl 16(%r8), %eax 2173 ; CHECK-SSE1-NEXT: andw %si, %ax 2174 ; CHECK-SSE1-NEXT: notl %esi 2175 ; CHECK-SSE1-NEXT: andw 16(%r10), %si 2176 ; CHECK-SSE1-NEXT: orl %eax, %esi 2177 ; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2178 ; CHECK-SSE1-NEXT: movzwl 14(%r8), %eax 2179 ; CHECK-SSE1-NEXT: andw %dx, %ax 2180 ; CHECK-SSE1-NEXT: notl %edx 2181 ; CHECK-SSE1-NEXT: andw 14(%r10), %dx 2182 ; CHECK-SSE1-NEXT: orl %eax, %edx 2183 ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2184 ; CHECK-SSE1-NEXT: movzwl 12(%r8), %eax 2185 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2186 ; CHECK-SSE1-NEXT: andw %cx, %ax 2187 ; CHECK-SSE1-NEXT: notl %ecx 2188 ; CHECK-SSE1-NEXT: andw 12(%r10), %cx 2189 ; CHECK-SSE1-NEXT: orl %eax, %ecx 2190 ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2191 ; CHECK-SSE1-NEXT: movzwl 10(%r9), %r13d 2192 ; CHECK-SSE1-NEXT: movzwl 10(%r8), %eax 2193 ; CHECK-SSE1-NEXT: andw %r13w, %ax 2194 ; CHECK-SSE1-NEXT: notl %r13d 2195 ; CHECK-SSE1-NEXT: andw 10(%r10), %r13w 2196 ; CHECK-SSE1-NEXT: orl %eax, %r13d 2197 ; CHECK-SSE1-NEXT: movl 8(%r9), %edi 2198 ; CHECK-SSE1-NEXT: movzwl 8(%r8), %eax 2199 ; CHECK-SSE1-NEXT: andw %di, %ax 2200 ; CHECK-SSE1-NEXT: notl %edi 2201 ; CHECK-SSE1-NEXT: andw 8(%r10), %di 2202 ; CHECK-SSE1-NEXT: orl %eax, %edi 2203 ; CHECK-SSE1-NEXT: movzwl 6(%r9), %esi 2204 ; CHECK-SSE1-NEXT: movzwl 6(%r8), %eax 2205 ; CHECK-SSE1-NEXT: andw %si, %ax 2206 ; CHECK-SSE1-NEXT: notl %esi 2207 ; CHECK-SSE1-NEXT: andw 6(%r10), %si 2208 ; CHECK-SSE1-NEXT: orl %eax, %esi 2209 ; CHECK-SSE1-NEXT: movl 4(%r9), %edx 2210 ; CHECK-SSE1-NEXT: movzwl 4(%r8), %eax 2211 ; CHECK-SSE1-NEXT: andw %dx, %ax 2212 ; CHECK-SSE1-NEXT: notl %edx 2213 ; CHECK-SSE1-NEXT: andw 4(%r10), %dx 2214 ; CHECK-SSE1-NEXT: orl %eax, %edx 2215 ; CHECK-SSE1-NEXT: movzwl 2(%r9), %eax 2216 ; CHECK-SSE1-NEXT: movzwl 2(%r8), %ecx 2217 ; CHECK-SSE1-NEXT: andw %ax, %cx 2218 ; CHECK-SSE1-NEXT: notl %eax 2219 ; CHECK-SSE1-NEXT: andw 2(%r10), %ax 2220 ; CHECK-SSE1-NEXT: orl %ecx, %eax 2221 ; CHECK-SSE1-NEXT: movl (%r9), %r9d 2222 ; CHECK-SSE1-NEXT: movzwl (%r8), %ecx 2223 ; CHECK-SSE1-NEXT: andw %r9w, %cx 2224 ; CHECK-SSE1-NEXT: notl %r9d 2225 ; CHECK-SSE1-NEXT: andw (%r10), %r9w 2226 ; CHECK-SSE1-NEXT: orl %ecx, %r9d 2227 ; CHECK-SSE1-NEXT: movw %bp, 30(%r11) 2228 ; CHECK-SSE1-NEXT: movw %bx, 28(%r11) 2229 ; CHECK-SSE1-NEXT: movw %r14w, 26(%r11) 2230 ; CHECK-SSE1-NEXT: movw %r12w, 24(%r11) 2231 ; CHECK-SSE1-NEXT: movw %r15w, 22(%r11) 2232 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2233 ; CHECK-SSE1-NEXT: movw %cx, 20(%r11) 2234 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2235 ; CHECK-SSE1-NEXT: movw %cx, 18(%r11) 2236 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2237 ; CHECK-SSE1-NEXT: movw %cx, 16(%r11) 2238 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2239 ; CHECK-SSE1-NEXT: movw %cx, 14(%r11) 2240 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2241 ; CHECK-SSE1-NEXT: movw %cx, 12(%r11) 2242 ; CHECK-SSE1-NEXT: movw %r13w, 10(%r11) 2243 ; CHECK-SSE1-NEXT: movw %di, 8(%r11) 2244 ; CHECK-SSE1-NEXT: movw %si, 6(%r11) 2245 ; CHECK-SSE1-NEXT: movw %dx, 4(%r11) 2246 ; CHECK-SSE1-NEXT: movw %ax, 2(%r11) 2247 ; CHECK-SSE1-NEXT: movw %r9w, (%r11) 2248 ; CHECK-SSE1-NEXT: movq %r11, %rax 2249 ; CHECK-SSE1-NEXT: popq %rbx 2250 ; CHECK-SSE1-NEXT: popq %r12 2251 ; CHECK-SSE1-NEXT: popq %r13 2252 ; CHECK-SSE1-NEXT: popq %r14 2253 ; CHECK-SSE1-NEXT: popq %r15 2254 ; CHECK-SSE1-NEXT: popq %rbp 2255 ; CHECK-SSE1-NEXT: retq 2256 ; 2257 ; CHECK-SSE2-LABEL: out_v16i16: 2258 ; CHECK-SSE2: # %bb.0: 2259 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 2260 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 2261 ; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 2262 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 2263 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 2264 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 2265 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 2266 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 2267 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 2268 ; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 2269 ; CHECK-SSE2-NEXT: retq 2270 ; 2271 ; CHECK-XOP-LABEL: out_v16i16: 2272 ; CHECK-XOP: # %bb.0: 2273 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 2274 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 2275 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 2276 ; CHECK-XOP-NEXT: retq 2277 %x = load <16 x i16>, <16 x i16> *%px, align 32 2278 %y = load <16 x i16>, <16 x i16> *%py, align 32 2279 %mask = load <16 x i16>, <16 x i16> *%pmask, align 32 2280 %mx = and <16 x i16> %x, %mask 2281 %notmask = xor <16 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 2282 %my = and <16 x i16> %y, %notmask 2283 %r = or <16 x i16> %mx, %my 2284 ret <16 x i16> %r 2285 } 2286 2287 define <8 x i32> @out_v8i32(<8 x i32> *%px, <8 x i32> *%py, <8 x i32> *%pmask) nounwind { 2288 ; CHECK-BASELINE-LABEL: out_v8i32: 2289 ; CHECK-BASELINE: # %bb.0: 2290 ; CHECK-BASELINE-NEXT: pushq %rbp 2291 ; CHECK-BASELINE-NEXT: pushq %r15 2292 ; CHECK-BASELINE-NEXT: pushq %r14 2293 ; CHECK-BASELINE-NEXT: pushq %rbx 2294 ; CHECK-BASELINE-NEXT: movl 4(%rcx), %r8d 2295 ; CHECK-BASELINE-NEXT: movl 8(%rcx), %r9d 2296 ; CHECK-BASELINE-NEXT: movl 12(%rcx), %r10d 2297 ; CHECK-BASELINE-NEXT: movl 16(%rcx), %r11d 2298 ; CHECK-BASELINE-NEXT: movl 20(%rcx), %r15d 2299 ; CHECK-BASELINE-NEXT: movl 24(%rcx), %ebx 2300 ; CHECK-BASELINE-NEXT: movl 28(%rcx), %ebp 2301 ; CHECK-BASELINE-NEXT: movl 28(%rsi), %r14d 2302 ; CHECK-BASELINE-NEXT: andl %ebp, %r14d 2303 ; CHECK-BASELINE-NEXT: notl %ebp 2304 ; CHECK-BASELINE-NEXT: andl 28(%rdx), %ebp 2305 ; CHECK-BASELINE-NEXT: orl %r14d, %ebp 2306 ; CHECK-BASELINE-NEXT: movl 24(%rsi), %eax 2307 ; CHECK-BASELINE-NEXT: andl %ebx, %eax 2308 ; CHECK-BASELINE-NEXT: notl %ebx 2309 ; CHECK-BASELINE-NEXT: andl 24(%rdx), %ebx 2310 ; CHECK-BASELINE-NEXT: orl %eax, %ebx 2311 ; CHECK-BASELINE-NEXT: movl 20(%rsi), %eax 2312 ; CHECK-BASELINE-NEXT: andl %r15d, %eax 2313 ; CHECK-BASELINE-NEXT: notl %r15d 2314 ; CHECK-BASELINE-NEXT: andl 20(%rdx), %r15d 2315 ; CHECK-BASELINE-NEXT: orl %eax, %r15d 2316 ; CHECK-BASELINE-NEXT: movl 16(%rsi), %eax 2317 ; CHECK-BASELINE-NEXT: andl %r11d, %eax 2318 ; CHECK-BASELINE-NEXT: notl %r11d 2319 ; CHECK-BASELINE-NEXT: andl 16(%rdx), %r11d 2320 ; CHECK-BASELINE-NEXT: orl %eax, %r11d 2321 ; CHECK-BASELINE-NEXT: movl 12(%rsi), %eax 2322 ; CHECK-BASELINE-NEXT: andl %r10d, %eax 2323 ; CHECK-BASELINE-NEXT: notl %r10d 2324 ; CHECK-BASELINE-NEXT: andl 12(%rdx), %r10d 2325 ; CHECK-BASELINE-NEXT: orl %eax, %r10d 2326 ; CHECK-BASELINE-NEXT: movl 8(%rsi), %eax 2327 ; CHECK-BASELINE-NEXT: andl %r9d, %eax 2328 ; CHECK-BASELINE-NEXT: notl %r9d 2329 ; CHECK-BASELINE-NEXT: andl 8(%rdx), %r9d 2330 ; CHECK-BASELINE-NEXT: orl %eax, %r9d 2331 ; CHECK-BASELINE-NEXT: movl 4(%rsi), %eax 2332 ; CHECK-BASELINE-NEXT: andl %r8d, %eax 2333 ; CHECK-BASELINE-NEXT: notl %r8d 2334 ; CHECK-BASELINE-NEXT: andl 4(%rdx), %r8d 2335 ; CHECK-BASELINE-NEXT: orl %eax, %r8d 2336 ; CHECK-BASELINE-NEXT: movl (%rcx), %eax 2337 ; CHECK-BASELINE-NEXT: movl (%rsi), %ecx 2338 ; CHECK-BASELINE-NEXT: andl %eax, %ecx 2339 ; CHECK-BASELINE-NEXT: notl %eax 2340 ; CHECK-BASELINE-NEXT: andl (%rdx), %eax 2341 ; CHECK-BASELINE-NEXT: orl %ecx, %eax 2342 ; CHECK-BASELINE-NEXT: movl %ebp, 28(%rdi) 2343 ; CHECK-BASELINE-NEXT: movl %ebx, 24(%rdi) 2344 ; CHECK-BASELINE-NEXT: movl %r15d, 20(%rdi) 2345 ; CHECK-BASELINE-NEXT: movl %r11d, 16(%rdi) 2346 ; CHECK-BASELINE-NEXT: movl %r10d, 12(%rdi) 2347 ; CHECK-BASELINE-NEXT: movl %r9d, 8(%rdi) 2348 ; CHECK-BASELINE-NEXT: movl %r8d, 4(%rdi) 2349 ; CHECK-BASELINE-NEXT: movl %eax, (%rdi) 2350 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 2351 ; CHECK-BASELINE-NEXT: popq %rbx 2352 ; CHECK-BASELINE-NEXT: popq %r14 2353 ; CHECK-BASELINE-NEXT: popq %r15 2354 ; CHECK-BASELINE-NEXT: popq %rbp 2355 ; CHECK-BASELINE-NEXT: retq 2356 ; 2357 ; CHECK-SSE1-LABEL: out_v8i32: 2358 ; CHECK-SSE1: # %bb.0: 2359 ; CHECK-SSE1-NEXT: pushq %rbp 2360 ; CHECK-SSE1-NEXT: pushq %r15 2361 ; CHECK-SSE1-NEXT: pushq %r14 2362 ; CHECK-SSE1-NEXT: pushq %rbx 2363 ; CHECK-SSE1-NEXT: movl 4(%rcx), %r8d 2364 ; CHECK-SSE1-NEXT: movl 8(%rcx), %r9d 2365 ; CHECK-SSE1-NEXT: movl 12(%rcx), %r10d 2366 ; CHECK-SSE1-NEXT: movl 16(%rcx), %r11d 2367 ; CHECK-SSE1-NEXT: movl 20(%rcx), %r15d 2368 ; CHECK-SSE1-NEXT: movl 24(%rcx), %ebx 2369 ; CHECK-SSE1-NEXT: movl 28(%rcx), %ebp 2370 ; CHECK-SSE1-NEXT: movl 28(%rsi), %r14d 2371 ; CHECK-SSE1-NEXT: andl %ebp, %r14d 2372 ; CHECK-SSE1-NEXT: notl %ebp 2373 ; CHECK-SSE1-NEXT: andl 28(%rdx), %ebp 2374 ; CHECK-SSE1-NEXT: orl %r14d, %ebp 2375 ; CHECK-SSE1-NEXT: movl 24(%rsi), %eax 2376 ; CHECK-SSE1-NEXT: andl %ebx, %eax 2377 ; CHECK-SSE1-NEXT: notl %ebx 2378 ; CHECK-SSE1-NEXT: andl 24(%rdx), %ebx 2379 ; CHECK-SSE1-NEXT: orl %eax, %ebx 2380 ; CHECK-SSE1-NEXT: movl 20(%rsi), %eax 2381 ; CHECK-SSE1-NEXT: andl %r15d, %eax 2382 ; CHECK-SSE1-NEXT: notl %r15d 2383 ; CHECK-SSE1-NEXT: andl 20(%rdx), %r15d 2384 ; CHECK-SSE1-NEXT: orl %eax, %r15d 2385 ; CHECK-SSE1-NEXT: movl 16(%rsi), %eax 2386 ; CHECK-SSE1-NEXT: andl %r11d, %eax 2387 ; CHECK-SSE1-NEXT: notl %r11d 2388 ; CHECK-SSE1-NEXT: andl 16(%rdx), %r11d 2389 ; CHECK-SSE1-NEXT: orl %eax, %r11d 2390 ; CHECK-SSE1-NEXT: movl 12(%rsi), %eax 2391 ; CHECK-SSE1-NEXT: andl %r10d, %eax 2392 ; CHECK-SSE1-NEXT: notl %r10d 2393 ; CHECK-SSE1-NEXT: andl 12(%rdx), %r10d 2394 ; CHECK-SSE1-NEXT: orl %eax, %r10d 2395 ; CHECK-SSE1-NEXT: movl 8(%rsi), %eax 2396 ; CHECK-SSE1-NEXT: andl %r9d, %eax 2397 ; CHECK-SSE1-NEXT: notl %r9d 2398 ; CHECK-SSE1-NEXT: andl 8(%rdx), %r9d 2399 ; CHECK-SSE1-NEXT: orl %eax, %r9d 2400 ; CHECK-SSE1-NEXT: movl 4(%rsi), %eax 2401 ; CHECK-SSE1-NEXT: andl %r8d, %eax 2402 ; CHECK-SSE1-NEXT: notl %r8d 2403 ; CHECK-SSE1-NEXT: andl 4(%rdx), %r8d 2404 ; CHECK-SSE1-NEXT: orl %eax, %r8d 2405 ; CHECK-SSE1-NEXT: movl (%rcx), %eax 2406 ; CHECK-SSE1-NEXT: movl (%rsi), %ecx 2407 ; CHECK-SSE1-NEXT: andl %eax, %ecx 2408 ; CHECK-SSE1-NEXT: notl %eax 2409 ; CHECK-SSE1-NEXT: andl (%rdx), %eax 2410 ; CHECK-SSE1-NEXT: orl %ecx, %eax 2411 ; CHECK-SSE1-NEXT: movl %ebp, 28(%rdi) 2412 ; CHECK-SSE1-NEXT: movl %ebx, 24(%rdi) 2413 ; CHECK-SSE1-NEXT: movl %r15d, 20(%rdi) 2414 ; CHECK-SSE1-NEXT: movl %r11d, 16(%rdi) 2415 ; CHECK-SSE1-NEXT: movl %r10d, 12(%rdi) 2416 ; CHECK-SSE1-NEXT: movl %r9d, 8(%rdi) 2417 ; CHECK-SSE1-NEXT: movl %r8d, 4(%rdi) 2418 ; CHECK-SSE1-NEXT: movl %eax, (%rdi) 2419 ; CHECK-SSE1-NEXT: movq %rdi, %rax 2420 ; CHECK-SSE1-NEXT: popq %rbx 2421 ; CHECK-SSE1-NEXT: popq %r14 2422 ; CHECK-SSE1-NEXT: popq %r15 2423 ; CHECK-SSE1-NEXT: popq %rbp 2424 ; CHECK-SSE1-NEXT: retq 2425 ; 2426 ; CHECK-SSE2-LABEL: out_v8i32: 2427 ; CHECK-SSE2: # %bb.0: 2428 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 2429 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 2430 ; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 2431 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 2432 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 2433 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 2434 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 2435 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 2436 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 2437 ; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 2438 ; CHECK-SSE2-NEXT: retq 2439 ; 2440 ; CHECK-XOP-LABEL: out_v8i32: 2441 ; CHECK-XOP: # %bb.0: 2442 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 2443 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 2444 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 2445 ; CHECK-XOP-NEXT: retq 2446 %x = load <8 x i32>, <8 x i32> *%px, align 32 2447 %y = load <8 x i32>, <8 x i32> *%py, align 32 2448 %mask = load <8 x i32>, <8 x i32> *%pmask, align 32 2449 %mx = and <8 x i32> %x, %mask 2450 %notmask = xor <8 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 2451 %my = and <8 x i32> %y, %notmask 2452 %r = or <8 x i32> %mx, %my 2453 ret <8 x i32> %r 2454 } 2455 2456 define <4 x i64> @out_v4i64(<4 x i64> *%px, <4 x i64> *%py, <4 x i64> *%pmask) nounwind { 2457 ; CHECK-BASELINE-LABEL: out_v4i64: 2458 ; CHECK-BASELINE: # %bb.0: 2459 ; CHECK-BASELINE-NEXT: pushq %rbx 2460 ; CHECK-BASELINE-NEXT: movq (%rcx), %r8 2461 ; CHECK-BASELINE-NEXT: movq 8(%rcx), %r9 2462 ; CHECK-BASELINE-NEXT: movq 16(%rcx), %rax 2463 ; CHECK-BASELINE-NEXT: movq 24(%rcx), %rcx 2464 ; CHECK-BASELINE-NEXT: movq 24(%rsi), %r10 2465 ; CHECK-BASELINE-NEXT: andq %rcx, %r10 2466 ; CHECK-BASELINE-NEXT: movq 16(%rsi), %r11 2467 ; CHECK-BASELINE-NEXT: andq %rax, %r11 2468 ; CHECK-BASELINE-NEXT: movq 8(%rsi), %rbx 2469 ; CHECK-BASELINE-NEXT: andq %r9, %rbx 2470 ; CHECK-BASELINE-NEXT: movq (%rsi), %rsi 2471 ; CHECK-BASELINE-NEXT: andq %r8, %rsi 2472 ; CHECK-BASELINE-NEXT: notq %r8 2473 ; CHECK-BASELINE-NEXT: notq %r9 2474 ; CHECK-BASELINE-NEXT: notq %rax 2475 ; CHECK-BASELINE-NEXT: notq %rcx 2476 ; CHECK-BASELINE-NEXT: andq 24(%rdx), %rcx 2477 ; CHECK-BASELINE-NEXT: orq %r10, %rcx 2478 ; CHECK-BASELINE-NEXT: andq 16(%rdx), %rax 2479 ; CHECK-BASELINE-NEXT: orq %r11, %rax 2480 ; CHECK-BASELINE-NEXT: andq 8(%rdx), %r9 2481 ; CHECK-BASELINE-NEXT: orq %rbx, %r9 2482 ; CHECK-BASELINE-NEXT: andq (%rdx), %r8 2483 ; CHECK-BASELINE-NEXT: orq %rsi, %r8 2484 ; CHECK-BASELINE-NEXT: movq %rcx, 24(%rdi) 2485 ; CHECK-BASELINE-NEXT: movq %rax, 16(%rdi) 2486 ; CHECK-BASELINE-NEXT: movq %r9, 8(%rdi) 2487 ; CHECK-BASELINE-NEXT: movq %r8, (%rdi) 2488 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 2489 ; CHECK-BASELINE-NEXT: popq %rbx 2490 ; CHECK-BASELINE-NEXT: retq 2491 ; 2492 ; CHECK-SSE1-LABEL: out_v4i64: 2493 ; CHECK-SSE1: # %bb.0: 2494 ; CHECK-SSE1-NEXT: pushq %rbx 2495 ; CHECK-SSE1-NEXT: movq (%rcx), %r8 2496 ; CHECK-SSE1-NEXT: movq 8(%rcx), %r9 2497 ; CHECK-SSE1-NEXT: movq 16(%rcx), %rax 2498 ; CHECK-SSE1-NEXT: movq 24(%rcx), %rcx 2499 ; CHECK-SSE1-NEXT: movq 24(%rsi), %r10 2500 ; CHECK-SSE1-NEXT: andq %rcx, %r10 2501 ; CHECK-SSE1-NEXT: movq 16(%rsi), %r11 2502 ; CHECK-SSE1-NEXT: andq %rax, %r11 2503 ; CHECK-SSE1-NEXT: movq 8(%rsi), %rbx 2504 ; CHECK-SSE1-NEXT: andq %r9, %rbx 2505 ; CHECK-SSE1-NEXT: movq (%rsi), %rsi 2506 ; CHECK-SSE1-NEXT: andq %r8, %rsi 2507 ; CHECK-SSE1-NEXT: notq %r8 2508 ; CHECK-SSE1-NEXT: notq %r9 2509 ; CHECK-SSE1-NEXT: notq %rax 2510 ; CHECK-SSE1-NEXT: notq %rcx 2511 ; CHECK-SSE1-NEXT: andq 24(%rdx), %rcx 2512 ; CHECK-SSE1-NEXT: orq %r10, %rcx 2513 ; CHECK-SSE1-NEXT: andq 16(%rdx), %rax 2514 ; CHECK-SSE1-NEXT: orq %r11, %rax 2515 ; CHECK-SSE1-NEXT: andq 8(%rdx), %r9 2516 ; CHECK-SSE1-NEXT: orq %rbx, %r9 2517 ; CHECK-SSE1-NEXT: andq (%rdx), %r8 2518 ; CHECK-SSE1-NEXT: orq %rsi, %r8 2519 ; CHECK-SSE1-NEXT: movq %rcx, 24(%rdi) 2520 ; CHECK-SSE1-NEXT: movq %rax, 16(%rdi) 2521 ; CHECK-SSE1-NEXT: movq %r9, 8(%rdi) 2522 ; CHECK-SSE1-NEXT: movq %r8, (%rdi) 2523 ; CHECK-SSE1-NEXT: movq %rdi, %rax 2524 ; CHECK-SSE1-NEXT: popq %rbx 2525 ; CHECK-SSE1-NEXT: retq 2526 ; 2527 ; CHECK-SSE2-LABEL: out_v4i64: 2528 ; CHECK-SSE2: # %bb.0: 2529 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 2530 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 2531 ; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 2532 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 2533 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 2534 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 2535 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 2536 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 2537 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 2538 ; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 2539 ; CHECK-SSE2-NEXT: retq 2540 ; 2541 ; CHECK-XOP-LABEL: out_v4i64: 2542 ; CHECK-XOP: # %bb.0: 2543 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 2544 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 2545 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 2546 ; CHECK-XOP-NEXT: retq 2547 %x = load <4 x i64>, <4 x i64> *%px, align 32 2548 %y = load <4 x i64>, <4 x i64> *%py, align 32 2549 %mask = load <4 x i64>, <4 x i64> *%pmask, align 32 2550 %mx = and <4 x i64> %x, %mask 2551 %notmask = xor <4 x i64> %mask, <i64 -1, i64 -1, i64 -1, i64 -1> 2552 %my = and <4 x i64> %y, %notmask 2553 %r = or <4 x i64> %mx, %my 2554 ret <4 x i64> %r 2555 } 2556 2557 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2558 ; Should be the same as the previous one. 2559 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2560 2561 ; ============================================================================ ; 2562 ; 8-bit vector width 2563 ; ============================================================================ ; 2564 2565 define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { 2566 ; CHECK-LABEL: in_v1i8: 2567 ; CHECK: # %bb.0: 2568 ; CHECK-NEXT: xorl %esi, %edi 2569 ; CHECK-NEXT: andl %edx, %edi 2570 ; CHECK-NEXT: xorl %esi, %edi 2571 ; CHECK-NEXT: movl %edi, %eax 2572 ; CHECK-NEXT: retq 2573 %n0 = xor <1 x i8> %x, %y 2574 %n1 = and <1 x i8> %n0, %mask 2575 %r = xor <1 x i8> %n1, %y 2576 ret <1 x i8> %r 2577 } 2578 2579 ; ============================================================================ ; 2580 ; 16-bit vector width 2581 ; ============================================================================ ; 2582 2583 define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { 2584 ; CHECK-BASELINE-LABEL: in_v2i8: 2585 ; CHECK-BASELINE: # %bb.0: 2586 ; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2587 ; CHECK-BASELINE-NEXT: xorl %edx, %edi 2588 ; CHECK-BASELINE-NEXT: andl %r8d, %edi 2589 ; CHECK-BASELINE-NEXT: andl %r9d, %esi 2590 ; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2591 ; CHECK-BASELINE-NEXT: xorl %edx, %edi 2592 ; CHECK-BASELINE-NEXT: movl %edi, %eax 2593 ; CHECK-BASELINE-NEXT: movl %esi, %edx 2594 ; CHECK-BASELINE-NEXT: retq 2595 ; 2596 ; CHECK-SSE1-LABEL: in_v2i8: 2597 ; CHECK-SSE1: # %bb.0: 2598 ; CHECK-SSE1-NEXT: xorl %ecx, %esi 2599 ; CHECK-SSE1-NEXT: xorl %edx, %edi 2600 ; CHECK-SSE1-NEXT: andl %r8d, %edi 2601 ; CHECK-SSE1-NEXT: andl %r9d, %esi 2602 ; CHECK-SSE1-NEXT: xorl %ecx, %esi 2603 ; CHECK-SSE1-NEXT: xorl %edx, %edi 2604 ; CHECK-SSE1-NEXT: movl %edi, %eax 2605 ; CHECK-SSE1-NEXT: movl %esi, %edx 2606 ; CHECK-SSE1-NEXT: retq 2607 ; 2608 ; CHECK-SSE2-LABEL: in_v2i8: 2609 ; CHECK-SSE2: # %bb.0: 2610 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2611 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2612 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2613 ; CHECK-SSE2-NEXT: retq 2614 ; 2615 ; CHECK-XOP-LABEL: in_v2i8: 2616 ; CHECK-XOP: # %bb.0: 2617 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2618 ; CHECK-XOP-NEXT: retq 2619 %n0 = xor <2 x i8> %x, %y 2620 %n1 = and <2 x i8> %n0, %mask 2621 %r = xor <2 x i8> %n1, %y 2622 ret <2 x i8> %r 2623 } 2624 2625 define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { 2626 ; CHECK-LABEL: in_v1i16: 2627 ; CHECK: # %bb.0: 2628 ; CHECK-NEXT: xorl %esi, %edi 2629 ; CHECK-NEXT: andl %edx, %edi 2630 ; CHECK-NEXT: xorl %esi, %edi 2631 ; CHECK-NEXT: movl %edi, %eax 2632 ; CHECK-NEXT: retq 2633 %n0 = xor <1 x i16> %x, %y 2634 %n1 = and <1 x i16> %n0, %mask 2635 %r = xor <1 x i16> %n1, %y 2636 ret <1 x i16> %r 2637 } 2638 2639 ; ============================================================================ ; 2640 ; 32-bit vector width 2641 ; ============================================================================ ; 2642 2643 define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 2644 ; CHECK-BASELINE-LABEL: in_v4i8: 2645 ; CHECK-BASELINE: # %bb.0: 2646 ; CHECK-BASELINE-NEXT: xorl %r9d, %esi 2647 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 2648 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 2649 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 2650 ; CHECK-BASELINE-NEXT: xorb %al, %dl 2651 ; CHECK-BASELINE-NEXT: xorb %r11b, %cl 2652 ; CHECK-BASELINE-NEXT: xorb %r10b, %r8b 2653 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2654 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 2655 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 2656 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 2657 ; CHECK-BASELINE-NEXT: xorb %r9b, %sil 2658 ; CHECK-BASELINE-NEXT: xorb %al, %dl 2659 ; CHECK-BASELINE-NEXT: xorb %r11b, %cl 2660 ; CHECK-BASELINE-NEXT: xorb %r10b, %r8b 2661 ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdi) 2662 ; CHECK-BASELINE-NEXT: movb %cl, 2(%rdi) 2663 ; CHECK-BASELINE-NEXT: movb %dl, 1(%rdi) 2664 ; CHECK-BASELINE-NEXT: movb %sil, (%rdi) 2665 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 2666 ; CHECK-BASELINE-NEXT: retq 2667 ; 2668 ; CHECK-SSE1-LABEL: in_v4i8: 2669 ; CHECK-SSE1: # %bb.0: 2670 ; CHECK-SSE1-NEXT: xorl %r9d, %esi 2671 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 2672 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 2673 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 2674 ; CHECK-SSE1-NEXT: xorb %al, %dl 2675 ; CHECK-SSE1-NEXT: xorb %r11b, %cl 2676 ; CHECK-SSE1-NEXT: xorb %r10b, %r8b 2677 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2678 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 2679 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 2680 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 2681 ; CHECK-SSE1-NEXT: xorb %r9b, %sil 2682 ; CHECK-SSE1-NEXT: xorb %al, %dl 2683 ; CHECK-SSE1-NEXT: xorb %r11b, %cl 2684 ; CHECK-SSE1-NEXT: xorb %r10b, %r8b 2685 ; CHECK-SSE1-NEXT: movb %r8b, 3(%rdi) 2686 ; CHECK-SSE1-NEXT: movb %cl, 2(%rdi) 2687 ; CHECK-SSE1-NEXT: movb %dl, 1(%rdi) 2688 ; CHECK-SSE1-NEXT: movb %sil, (%rdi) 2689 ; CHECK-SSE1-NEXT: movq %rdi, %rax 2690 ; CHECK-SSE1-NEXT: retq 2691 ; 2692 ; CHECK-SSE2-LABEL: in_v4i8: 2693 ; CHECK-SSE2: # %bb.0: 2694 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2695 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2696 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2697 ; CHECK-SSE2-NEXT: retq 2698 ; 2699 ; CHECK-XOP-LABEL: in_v4i8: 2700 ; CHECK-XOP: # %bb.0: 2701 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2702 ; CHECK-XOP-NEXT: retq 2703 %n0 = xor <4 x i8> %x, %y 2704 %n1 = and <4 x i8> %n0, %mask 2705 %r = xor <4 x i8> %n1, %y 2706 ret <4 x i8> %r 2707 } 2708 2709 define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { 2710 ; CHECK-BASELINE-LABEL: in_v2i16: 2711 ; CHECK-BASELINE: # %bb.0: 2712 ; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2713 ; CHECK-BASELINE-NEXT: xorl %edx, %edi 2714 ; CHECK-BASELINE-NEXT: andl %r8d, %edi 2715 ; CHECK-BASELINE-NEXT: andl %r9d, %esi 2716 ; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2717 ; CHECK-BASELINE-NEXT: xorl %edx, %edi 2718 ; CHECK-BASELINE-NEXT: movl %edi, %eax 2719 ; CHECK-BASELINE-NEXT: movl %esi, %edx 2720 ; CHECK-BASELINE-NEXT: retq 2721 ; 2722 ; CHECK-SSE1-LABEL: in_v2i16: 2723 ; CHECK-SSE1: # %bb.0: 2724 ; CHECK-SSE1-NEXT: xorl %ecx, %esi 2725 ; CHECK-SSE1-NEXT: xorl %edx, %edi 2726 ; CHECK-SSE1-NEXT: andl %r8d, %edi 2727 ; CHECK-SSE1-NEXT: andl %r9d, %esi 2728 ; CHECK-SSE1-NEXT: xorl %ecx, %esi 2729 ; CHECK-SSE1-NEXT: xorl %edx, %edi 2730 ; CHECK-SSE1-NEXT: movl %edi, %eax 2731 ; CHECK-SSE1-NEXT: movl %esi, %edx 2732 ; CHECK-SSE1-NEXT: retq 2733 ; 2734 ; CHECK-SSE2-LABEL: in_v2i16: 2735 ; CHECK-SSE2: # %bb.0: 2736 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2737 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2738 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2739 ; CHECK-SSE2-NEXT: retq 2740 ; 2741 ; CHECK-XOP-LABEL: in_v2i16: 2742 ; CHECK-XOP: # %bb.0: 2743 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2744 ; CHECK-XOP-NEXT: retq 2745 %n0 = xor <2 x i16> %x, %y 2746 %n1 = and <2 x i16> %n0, %mask 2747 %r = xor <2 x i16> %n1, %y 2748 ret <2 x i16> %r 2749 } 2750 2751 define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { 2752 ; CHECK-LABEL: in_v1i32: 2753 ; CHECK: # %bb.0: 2754 ; CHECK-NEXT: xorl %esi, %edi 2755 ; CHECK-NEXT: andl %edx, %edi 2756 ; CHECK-NEXT: xorl %esi, %edi 2757 ; CHECK-NEXT: movl %edi, %eax 2758 ; CHECK-NEXT: retq 2759 %n0 = xor <1 x i32> %x, %y 2760 %n1 = and <1 x i32> %n0, %mask 2761 %r = xor <1 x i32> %n1, %y 2762 ret <1 x i32> %r 2763 } 2764 2765 ; ============================================================================ ; 2766 ; 64-bit vector width 2767 ; ============================================================================ ; 2768 2769 define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { 2770 ; CHECK-BASELINE-LABEL: in_v8i8: 2771 ; CHECK-BASELINE: # %bb.0: 2772 ; CHECK-BASELINE-NEXT: pushq %rbp 2773 ; CHECK-BASELINE-NEXT: pushq %r15 2774 ; CHECK-BASELINE-NEXT: pushq %r14 2775 ; CHECK-BASELINE-NEXT: pushq %r13 2776 ; CHECK-BASELINE-NEXT: pushq %r12 2777 ; CHECK-BASELINE-NEXT: pushq %rbx 2778 ; CHECK-BASELINE-NEXT: movl %ecx, %r10d 2779 ; CHECK-BASELINE-NEXT: movl %edx, %r11d 2780 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 2781 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b 2782 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b 2783 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b 2784 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b 2785 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl 2786 ; CHECK-BASELINE-NEXT: xorb %bpl, %sil 2787 ; CHECK-BASELINE-NEXT: xorb %r13b, %r11b 2788 ; CHECK-BASELINE-NEXT: xorb %r12b, %r10b 2789 ; CHECK-BASELINE-NEXT: xorb %r15b, %r8b 2790 ; CHECK-BASELINE-NEXT: xorb %r14b, %r9b 2791 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl 2792 ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %dl 2793 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 2794 ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %cl 2795 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 2796 ; CHECK-BASELINE-NEXT: xorb %bl, %al 2797 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b 2798 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2799 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 2800 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 2801 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 2802 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 2803 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 2804 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 2805 ; CHECK-BASELINE-NEXT: xorb %bpl, %sil 2806 ; CHECK-BASELINE-NEXT: xorb %r13b, %r11b 2807 ; CHECK-BASELINE-NEXT: xorb %r12b, %r10b 2808 ; CHECK-BASELINE-NEXT: xorb %r15b, %r8b 2809 ; CHECK-BASELINE-NEXT: xorb %r14b, %r9b 2810 ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %dl 2811 ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %cl 2812 ; CHECK-BASELINE-NEXT: xorb %bl, %al 2813 ; CHECK-BASELINE-NEXT: movb %al, 7(%rdi) 2814 ; CHECK-BASELINE-NEXT: movb %cl, 6(%rdi) 2815 ; CHECK-BASELINE-NEXT: movb %dl, 5(%rdi) 2816 ; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdi) 2817 ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdi) 2818 ; CHECK-BASELINE-NEXT: movb %r10b, 2(%rdi) 2819 ; CHECK-BASELINE-NEXT: movb %r11b, 1(%rdi) 2820 ; CHECK-BASELINE-NEXT: movb %sil, (%rdi) 2821 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 2822 ; CHECK-BASELINE-NEXT: popq %rbx 2823 ; CHECK-BASELINE-NEXT: popq %r12 2824 ; CHECK-BASELINE-NEXT: popq %r13 2825 ; CHECK-BASELINE-NEXT: popq %r14 2826 ; CHECK-BASELINE-NEXT: popq %r15 2827 ; CHECK-BASELINE-NEXT: popq %rbp 2828 ; CHECK-BASELINE-NEXT: retq 2829 ; 2830 ; CHECK-SSE1-LABEL: in_v8i8: 2831 ; CHECK-SSE1: # %bb.0: 2832 ; CHECK-SSE1-NEXT: pushq %rbp 2833 ; CHECK-SSE1-NEXT: pushq %r15 2834 ; CHECK-SSE1-NEXT: pushq %r14 2835 ; CHECK-SSE1-NEXT: pushq %r13 2836 ; CHECK-SSE1-NEXT: pushq %r12 2837 ; CHECK-SSE1-NEXT: pushq %rbx 2838 ; CHECK-SSE1-NEXT: movl %ecx, %r10d 2839 ; CHECK-SSE1-NEXT: movl %edx, %r11d 2840 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 2841 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 2842 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 2843 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 2844 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b 2845 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 2846 ; CHECK-SSE1-NEXT: xorb %bpl, %sil 2847 ; CHECK-SSE1-NEXT: xorb %r13b, %r11b 2848 ; CHECK-SSE1-NEXT: xorb %r12b, %r10b 2849 ; CHECK-SSE1-NEXT: xorb %r15b, %r8b 2850 ; CHECK-SSE1-NEXT: xorb %r14b, %r9b 2851 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl 2852 ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %dl 2853 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 2854 ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %cl 2855 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 2856 ; CHECK-SSE1-NEXT: xorb %bl, %al 2857 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b 2858 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2859 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 2860 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 2861 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 2862 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 2863 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 2864 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 2865 ; CHECK-SSE1-NEXT: xorb %bpl, %sil 2866 ; CHECK-SSE1-NEXT: xorb %r13b, %r11b 2867 ; CHECK-SSE1-NEXT: xorb %r12b, %r10b 2868 ; CHECK-SSE1-NEXT: xorb %r15b, %r8b 2869 ; CHECK-SSE1-NEXT: xorb %r14b, %r9b 2870 ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %dl 2871 ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %cl 2872 ; CHECK-SSE1-NEXT: xorb %bl, %al 2873 ; CHECK-SSE1-NEXT: movb %al, 7(%rdi) 2874 ; CHECK-SSE1-NEXT: movb %cl, 6(%rdi) 2875 ; CHECK-SSE1-NEXT: movb %dl, 5(%rdi) 2876 ; CHECK-SSE1-NEXT: movb %r9b, 4(%rdi) 2877 ; CHECK-SSE1-NEXT: movb %r8b, 3(%rdi) 2878 ; CHECK-SSE1-NEXT: movb %r10b, 2(%rdi) 2879 ; CHECK-SSE1-NEXT: movb %r11b, 1(%rdi) 2880 ; CHECK-SSE1-NEXT: movb %sil, (%rdi) 2881 ; CHECK-SSE1-NEXT: movq %rdi, %rax 2882 ; CHECK-SSE1-NEXT: popq %rbx 2883 ; CHECK-SSE1-NEXT: popq %r12 2884 ; CHECK-SSE1-NEXT: popq %r13 2885 ; CHECK-SSE1-NEXT: popq %r14 2886 ; CHECK-SSE1-NEXT: popq %r15 2887 ; CHECK-SSE1-NEXT: popq %rbp 2888 ; CHECK-SSE1-NEXT: retq 2889 ; 2890 ; CHECK-SSE2-LABEL: in_v8i8: 2891 ; CHECK-SSE2: # %bb.0: 2892 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2893 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2894 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2895 ; CHECK-SSE2-NEXT: retq 2896 ; 2897 ; CHECK-XOP-LABEL: in_v8i8: 2898 ; CHECK-XOP: # %bb.0: 2899 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2900 ; CHECK-XOP-NEXT: retq 2901 %n0 = xor <8 x i8> %x, %y 2902 %n1 = and <8 x i8> %n0, %mask 2903 %r = xor <8 x i8> %n1, %y 2904 ret <8 x i8> %r 2905 } 2906 2907 define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 2908 ; CHECK-BASELINE-LABEL: in_v4i16: 2909 ; CHECK-BASELINE: # %bb.0: 2910 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 2911 ; CHECK-BASELINE-NEXT: xorl %r10d, %r8d 2912 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 2913 ; CHECK-BASELINE-NEXT: xorl %r11d, %ecx 2914 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax 2915 ; CHECK-BASELINE-NEXT: xorl %eax, %edx 2916 ; CHECK-BASELINE-NEXT: xorl %r9d, %esi 2917 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w 2918 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 2919 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 2920 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si 2921 ; CHECK-BASELINE-NEXT: xorl %r9d, %esi 2922 ; CHECK-BASELINE-NEXT: xorl %eax, %edx 2923 ; CHECK-BASELINE-NEXT: xorl %r11d, %ecx 2924 ; CHECK-BASELINE-NEXT: xorl %r10d, %r8d 2925 ; CHECK-BASELINE-NEXT: movw %r8w, 6(%rdi) 2926 ; CHECK-BASELINE-NEXT: movw %cx, 4(%rdi) 2927 ; CHECK-BASELINE-NEXT: movw %dx, 2(%rdi) 2928 ; CHECK-BASELINE-NEXT: movw %si, (%rdi) 2929 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 2930 ; CHECK-BASELINE-NEXT: retq 2931 ; 2932 ; CHECK-SSE1-LABEL: in_v4i16: 2933 ; CHECK-SSE1: # %bb.0: 2934 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 2935 ; CHECK-SSE1-NEXT: xorl %r10d, %r8d 2936 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 2937 ; CHECK-SSE1-NEXT: xorl %r11d, %ecx 2938 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax 2939 ; CHECK-SSE1-NEXT: xorl %eax, %edx 2940 ; CHECK-SSE1-NEXT: xorl %r9d, %esi 2941 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w 2942 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 2943 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 2944 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si 2945 ; CHECK-SSE1-NEXT: xorl %r9d, %esi 2946 ; CHECK-SSE1-NEXT: xorl %eax, %edx 2947 ; CHECK-SSE1-NEXT: xorl %r11d, %ecx 2948 ; CHECK-SSE1-NEXT: xorl %r10d, %r8d 2949 ; CHECK-SSE1-NEXT: movw %r8w, 6(%rdi) 2950 ; CHECK-SSE1-NEXT: movw %cx, 4(%rdi) 2951 ; CHECK-SSE1-NEXT: movw %dx, 2(%rdi) 2952 ; CHECK-SSE1-NEXT: movw %si, (%rdi) 2953 ; CHECK-SSE1-NEXT: movq %rdi, %rax 2954 ; CHECK-SSE1-NEXT: retq 2955 ; 2956 ; CHECK-SSE2-LABEL: in_v4i16: 2957 ; CHECK-SSE2: # %bb.0: 2958 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2959 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2960 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2961 ; CHECK-SSE2-NEXT: retq 2962 ; 2963 ; CHECK-XOP-LABEL: in_v4i16: 2964 ; CHECK-XOP: # %bb.0: 2965 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2966 ; CHECK-XOP-NEXT: retq 2967 %n0 = xor <4 x i16> %x, %y 2968 %n1 = and <4 x i16> %n0, %mask 2969 %r = xor <4 x i16> %n1, %y 2970 ret <4 x i16> %r 2971 } 2972 2973 define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { 2974 ; CHECK-BASELINE-LABEL: in_v2i32: 2975 ; CHECK-BASELINE: # %bb.0: 2976 ; CHECK-BASELINE-NEXT: xorl %edx, %edi 2977 ; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2978 ; CHECK-BASELINE-NEXT: andl %r9d, %esi 2979 ; CHECK-BASELINE-NEXT: andl %r8d, %edi 2980 ; CHECK-BASELINE-NEXT: xorl %edx, %edi 2981 ; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2982 ; CHECK-BASELINE-NEXT: movl %edi, %eax 2983 ; CHECK-BASELINE-NEXT: movl %esi, %edx 2984 ; CHECK-BASELINE-NEXT: retq 2985 ; 2986 ; CHECK-SSE1-LABEL: in_v2i32: 2987 ; CHECK-SSE1: # %bb.0: 2988 ; CHECK-SSE1-NEXT: xorl %edx, %edi 2989 ; CHECK-SSE1-NEXT: xorl %ecx, %esi 2990 ; CHECK-SSE1-NEXT: andl %r9d, %esi 2991 ; CHECK-SSE1-NEXT: andl %r8d, %edi 2992 ; CHECK-SSE1-NEXT: xorl %edx, %edi 2993 ; CHECK-SSE1-NEXT: xorl %ecx, %esi 2994 ; CHECK-SSE1-NEXT: movl %edi, %eax 2995 ; CHECK-SSE1-NEXT: movl %esi, %edx 2996 ; CHECK-SSE1-NEXT: retq 2997 ; 2998 ; CHECK-SSE2-LABEL: in_v2i32: 2999 ; CHECK-SSE2: # %bb.0: 3000 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 3001 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 3002 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 3003 ; CHECK-SSE2-NEXT: retq 3004 ; 3005 ; CHECK-XOP-LABEL: in_v2i32: 3006 ; CHECK-XOP: # %bb.0: 3007 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 3008 ; CHECK-XOP-NEXT: retq 3009 %n0 = xor <2 x i32> %x, %y 3010 %n1 = and <2 x i32> %n0, %mask 3011 %r = xor <2 x i32> %n1, %y 3012 ret <2 x i32> %r 3013 } 3014 3015 define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { 3016 ; CHECK-LABEL: in_v1i64: 3017 ; CHECK: # %bb.0: 3018 ; CHECK-NEXT: xorq %rsi, %rdi 3019 ; CHECK-NEXT: andq %rdx, %rdi 3020 ; CHECK-NEXT: xorq %rsi, %rdi 3021 ; CHECK-NEXT: movq %rdi, %rax 3022 ; CHECK-NEXT: retq 3023 %n0 = xor <1 x i64> %x, %y 3024 %n1 = and <1 x i64> %n0, %mask 3025 %r = xor <1 x i64> %n1, %y 3026 ret <1 x i64> %r 3027 } 3028 3029 ; ============================================================================ ; 3030 ; 128-bit vector width 3031 ; ============================================================================ ; 3032 3033 define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { 3034 ; CHECK-BASELINE-LABEL: in_v16i8: 3035 ; CHECK-BASELINE: # %bb.0: 3036 ; CHECK-BASELINE-NEXT: pushq %rbp 3037 ; CHECK-BASELINE-NEXT: pushq %r15 3038 ; CHECK-BASELINE-NEXT: pushq %r14 3039 ; CHECK-BASELINE-NEXT: pushq %r13 3040 ; CHECK-BASELINE-NEXT: pushq %r12 3041 ; CHECK-BASELINE-NEXT: pushq %rbx 3042 ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3043 ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3044 ; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3045 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 3046 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 3047 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b 3048 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b 3049 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b 3050 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b 3051 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl 3052 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 3053 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 3054 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 3055 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3056 ; CHECK-BASELINE-NEXT: xorb %al, %r9b 3057 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b 3058 ; CHECK-BASELINE-NEXT: xorb %al, %r9b 3059 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl 3060 ; CHECK-BASELINE-NEXT: xorb %r10b, %dl 3061 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 3062 ; CHECK-BASELINE-NEXT: xorb %r10b, %dl 3063 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 3064 ; CHECK-BASELINE-NEXT: xorb %r11b, %r10b 3065 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 3066 ; CHECK-BASELINE-NEXT: xorb %r11b, %r10b 3067 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 3068 ; CHECK-BASELINE-NEXT: xorb %bl, %r11b 3069 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 3070 ; CHECK-BASELINE-NEXT: xorb %bl, %r11b 3071 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 3072 ; CHECK-BASELINE-NEXT: xorb %bpl, %bl 3073 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 3074 ; CHECK-BASELINE-NEXT: xorb %bpl, %bl 3075 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl 3076 ; CHECK-BASELINE-NEXT: xorb %r13b, %bpl 3077 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl 3078 ; CHECK-BASELINE-NEXT: xorb %r13b, %bpl 3079 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b 3080 ; CHECK-BASELINE-NEXT: xorb %r12b, %r13b 3081 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b 3082 ; CHECK-BASELINE-NEXT: xorb %r12b, %r13b 3083 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b 3084 ; CHECK-BASELINE-NEXT: xorb %r15b, %r12b 3085 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b 3086 ; CHECK-BASELINE-NEXT: xorb %r15b, %r12b 3087 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b 3088 ; CHECK-BASELINE-NEXT: xorb %r14b, %r15b 3089 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b 3090 ; CHECK-BASELINE-NEXT: xorb %r14b, %r15b 3091 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b 3092 ; CHECK-BASELINE-NEXT: xorb %sil, %r14b 3093 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b 3094 ; CHECK-BASELINE-NEXT: xorb %sil, %r14b 3095 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3096 ; CHECK-BASELINE-NEXT: xorb %cl, %al 3097 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 3098 ; CHECK-BASELINE-NEXT: xorb %cl, %al 3099 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 3100 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 3101 ; CHECK-BASELINE-NEXT: xorb %sil, %cl 3102 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 3103 ; CHECK-BASELINE-NEXT: xorb %sil, %cl 3104 ; CHECK-BASELINE-NEXT: movb %cl, 15(%rdi) 3105 ; CHECK-BASELINE-NEXT: movb %al, 14(%rdi) 3106 ; CHECK-BASELINE-NEXT: movb %r14b, 13(%rdi) 3107 ; CHECK-BASELINE-NEXT: movb %r15b, 12(%rdi) 3108 ; CHECK-BASELINE-NEXT: movb %r12b, 11(%rdi) 3109 ; CHECK-BASELINE-NEXT: movb %r13b, 10(%rdi) 3110 ; CHECK-BASELINE-NEXT: movb %bpl, 9(%rdi) 3111 ; CHECK-BASELINE-NEXT: movb %bl, 8(%rdi) 3112 ; CHECK-BASELINE-NEXT: movb %r11b, 7(%rdi) 3113 ; CHECK-BASELINE-NEXT: movb %r10b, 6(%rdi) 3114 ; CHECK-BASELINE-NEXT: movb %dl, 5(%rdi) 3115 ; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdi) 3116 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3117 ; CHECK-BASELINE-NEXT: xorb %al, %r8b 3118 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 3119 ; CHECK-BASELINE-NEXT: xorb %al, %r8b 3120 ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdi) 3121 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3122 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3123 ; CHECK-BASELINE-NEXT: xorb %al, %cl 3124 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 3125 ; CHECK-BASELINE-NEXT: xorb %al, %cl 3126 ; CHECK-BASELINE-NEXT: movb %cl, 2(%rdi) 3127 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3128 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3129 ; CHECK-BASELINE-NEXT: xorb %al, %cl 3130 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 3131 ; CHECK-BASELINE-NEXT: xorb %al, %cl 3132 ; CHECK-BASELINE-NEXT: movb %cl, 1(%rdi) 3133 ; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3134 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3135 ; CHECK-BASELINE-NEXT: xorb %al, %cl 3136 ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 3137 ; CHECK-BASELINE-NEXT: xorb %al, %cl 3138 ; CHECK-BASELINE-NEXT: movb %cl, (%rdi) 3139 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 3140 ; CHECK-BASELINE-NEXT: popq %rbx 3141 ; CHECK-BASELINE-NEXT: popq %r12 3142 ; CHECK-BASELINE-NEXT: popq %r13 3143 ; CHECK-BASELINE-NEXT: popq %r14 3144 ; CHECK-BASELINE-NEXT: popq %r15 3145 ; CHECK-BASELINE-NEXT: popq %rbp 3146 ; CHECK-BASELINE-NEXT: retq 3147 ; 3148 ; CHECK-SSE1-LABEL: in_v16i8: 3149 ; CHECK-SSE1: # %bb.0: 3150 ; CHECK-SSE1-NEXT: pushq %rbp 3151 ; CHECK-SSE1-NEXT: pushq %r15 3152 ; CHECK-SSE1-NEXT: pushq %r14 3153 ; CHECK-SSE1-NEXT: pushq %r13 3154 ; CHECK-SSE1-NEXT: pushq %r12 3155 ; CHECK-SSE1-NEXT: pushq %rbx 3156 ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3157 ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3158 ; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3159 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 3160 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 3161 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 3162 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 3163 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 3164 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b 3165 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 3166 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 3167 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 3168 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 3169 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3170 ; CHECK-SSE1-NEXT: xorb %al, %r9b 3171 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b 3172 ; CHECK-SSE1-NEXT: xorb %al, %r9b 3173 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl 3174 ; CHECK-SSE1-NEXT: xorb %r10b, %dl 3175 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 3176 ; CHECK-SSE1-NEXT: xorb %r10b, %dl 3177 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 3178 ; CHECK-SSE1-NEXT: xorb %r11b, %r10b 3179 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 3180 ; CHECK-SSE1-NEXT: xorb %r11b, %r10b 3181 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 3182 ; CHECK-SSE1-NEXT: xorb %bl, %r11b 3183 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 3184 ; CHECK-SSE1-NEXT: xorb %bl, %r11b 3185 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 3186 ; CHECK-SSE1-NEXT: xorb %bpl, %bl 3187 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 3188 ; CHECK-SSE1-NEXT: xorb %bpl, %bl 3189 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 3190 ; CHECK-SSE1-NEXT: xorb %r13b, %bpl 3191 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl 3192 ; CHECK-SSE1-NEXT: xorb %r13b, %bpl 3193 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b 3194 ; CHECK-SSE1-NEXT: xorb %r12b, %r13b 3195 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b 3196 ; CHECK-SSE1-NEXT: xorb %r12b, %r13b 3197 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 3198 ; CHECK-SSE1-NEXT: xorb %r15b, %r12b 3199 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b 3200 ; CHECK-SSE1-NEXT: xorb %r15b, %r12b 3201 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 3202 ; CHECK-SSE1-NEXT: xorb %r14b, %r15b 3203 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b 3204 ; CHECK-SSE1-NEXT: xorb %r14b, %r15b 3205 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 3206 ; CHECK-SSE1-NEXT: xorb %sil, %r14b 3207 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b 3208 ; CHECK-SSE1-NEXT: xorb %sil, %r14b 3209 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3210 ; CHECK-SSE1-NEXT: xorb %cl, %al 3211 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 3212 ; CHECK-SSE1-NEXT: xorb %cl, %al 3213 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 3214 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 3215 ; CHECK-SSE1-NEXT: xorb %sil, %cl 3216 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 3217 ; CHECK-SSE1-NEXT: xorb %sil, %cl 3218 ; CHECK-SSE1-NEXT: movb %cl, 15(%rdi) 3219 ; CHECK-SSE1-NEXT: movb %al, 14(%rdi) 3220 ; CHECK-SSE1-NEXT: movb %r14b, 13(%rdi) 3221 ; CHECK-SSE1-NEXT: movb %r15b, 12(%rdi) 3222 ; CHECK-SSE1-NEXT: movb %r12b, 11(%rdi) 3223 ; CHECK-SSE1-NEXT: movb %r13b, 10(%rdi) 3224 ; CHECK-SSE1-NEXT: movb %bpl, 9(%rdi) 3225 ; CHECK-SSE1-NEXT: movb %bl, 8(%rdi) 3226 ; CHECK-SSE1-NEXT: movb %r11b, 7(%rdi) 3227 ; CHECK-SSE1-NEXT: movb %r10b, 6(%rdi) 3228 ; CHECK-SSE1-NEXT: movb %dl, 5(%rdi) 3229 ; CHECK-SSE1-NEXT: movb %r9b, 4(%rdi) 3230 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3231 ; CHECK-SSE1-NEXT: xorb %al, %r8b 3232 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 3233 ; CHECK-SSE1-NEXT: xorb %al, %r8b 3234 ; CHECK-SSE1-NEXT: movb %r8b, 3(%rdi) 3235 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3236 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3237 ; CHECK-SSE1-NEXT: xorb %al, %cl 3238 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 3239 ; CHECK-SSE1-NEXT: xorb %al, %cl 3240 ; CHECK-SSE1-NEXT: movb %cl, 2(%rdi) 3241 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3242 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3243 ; CHECK-SSE1-NEXT: xorb %al, %cl 3244 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 3245 ; CHECK-SSE1-NEXT: xorb %al, %cl 3246 ; CHECK-SSE1-NEXT: movb %cl, 1(%rdi) 3247 ; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3248 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3249 ; CHECK-SSE1-NEXT: xorb %al, %cl 3250 ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 3251 ; CHECK-SSE1-NEXT: xorb %al, %cl 3252 ; CHECK-SSE1-NEXT: movb %cl, (%rdi) 3253 ; CHECK-SSE1-NEXT: movq %rdi, %rax 3254 ; CHECK-SSE1-NEXT: popq %rbx 3255 ; CHECK-SSE1-NEXT: popq %r12 3256 ; CHECK-SSE1-NEXT: popq %r13 3257 ; CHECK-SSE1-NEXT: popq %r14 3258 ; CHECK-SSE1-NEXT: popq %r15 3259 ; CHECK-SSE1-NEXT: popq %rbp 3260 ; CHECK-SSE1-NEXT: retq 3261 ; 3262 ; CHECK-SSE2-LABEL: in_v16i8: 3263 ; CHECK-SSE2: # %bb.0: 3264 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 3265 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 3266 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 3267 ; CHECK-SSE2-NEXT: retq 3268 ; 3269 ; CHECK-XOP-LABEL: in_v16i8: 3270 ; CHECK-XOP: # %bb.0: 3271 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 3272 ; CHECK-XOP-NEXT: retq 3273 %n0 = xor <16 x i8> %x, %y 3274 %n1 = and <16 x i8> %n0, %mask 3275 %r = xor <16 x i8> %n1, %y 3276 ret <16 x i8> %r 3277 } 3278 3279 define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { 3280 ; CHECK-BASELINE-LABEL: in_v8i16: 3281 ; CHECK-BASELINE: # %bb.0: 3282 ; CHECK-BASELINE-NEXT: pushq %rbp 3283 ; CHECK-BASELINE-NEXT: pushq %r14 3284 ; CHECK-BASELINE-NEXT: pushq %rbx 3285 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 3286 ; CHECK-BASELINE-NEXT: xorl %r10d, %r9d 3287 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 3288 ; CHECK-BASELINE-NEXT: xorl %r11d, %r8d 3289 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax 3290 ; CHECK-BASELINE-NEXT: xorl %eax, %ecx 3291 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3292 ; CHECK-BASELINE-NEXT: xorl %ebx, %esi 3293 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si 3294 ; CHECK-BASELINE-NEXT: xorl %ebx, %esi 3295 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3296 ; CHECK-BASELINE-NEXT: xorl %ebx, %edx 3297 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 3298 ; CHECK-BASELINE-NEXT: xorl %ebx, %edx 3299 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r14d 3300 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 3301 ; CHECK-BASELINE-NEXT: xorl %eax, %ecx 3302 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %eax 3303 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w 3304 ; CHECK-BASELINE-NEXT: xorl %r11d, %r8d 3305 ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3306 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r9w 3307 ; CHECK-BASELINE-NEXT: xorl %r10d, %r9d 3308 ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp 3309 ; CHECK-BASELINE-NEXT: xorw %bx, %bp 3310 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bp 3311 ; CHECK-BASELINE-NEXT: xorl %ebx, %ebp 3312 ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 3313 ; CHECK-BASELINE-NEXT: xorw %ax, %bx 3314 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx 3315 ; CHECK-BASELINE-NEXT: xorl %eax, %ebx 3316 ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax 3317 ; CHECK-BASELINE-NEXT: xorw %r14w, %ax 3318 ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %ax 3319 ; CHECK-BASELINE-NEXT: xorl %r14d, %eax 3320 ; CHECK-BASELINE-NEXT: movw %ax, 14(%rdi) 3321 ; CHECK-BASELINE-NEXT: movw %bx, 12(%rdi) 3322 ; CHECK-BASELINE-NEXT: movw %bp, 10(%rdi) 3323 ; CHECK-BASELINE-NEXT: movw %r9w, 8(%rdi) 3324 ; CHECK-BASELINE-NEXT: movw %r8w, 6(%rdi) 3325 ; CHECK-BASELINE-NEXT: movw %cx, 4(%rdi) 3326 ; CHECK-BASELINE-NEXT: movw %dx, 2(%rdi) 3327 ; CHECK-BASELINE-NEXT: movw %si, (%rdi) 3328 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 3329 ; CHECK-BASELINE-NEXT: popq %rbx 3330 ; CHECK-BASELINE-NEXT: popq %r14 3331 ; CHECK-BASELINE-NEXT: popq %rbp 3332 ; CHECK-BASELINE-NEXT: retq 3333 ; 3334 ; CHECK-SSE1-LABEL: in_v8i16: 3335 ; CHECK-SSE1: # %bb.0: 3336 ; CHECK-SSE1-NEXT: pushq %rbp 3337 ; CHECK-SSE1-NEXT: pushq %r14 3338 ; CHECK-SSE1-NEXT: pushq %rbx 3339 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 3340 ; CHECK-SSE1-NEXT: xorl %r10d, %r9d 3341 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 3342 ; CHECK-SSE1-NEXT: xorl %r11d, %r8d 3343 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax 3344 ; CHECK-SSE1-NEXT: xorl %eax, %ecx 3345 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3346 ; CHECK-SSE1-NEXT: xorl %ebx, %esi 3347 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si 3348 ; CHECK-SSE1-NEXT: xorl %ebx, %esi 3349 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3350 ; CHECK-SSE1-NEXT: xorl %ebx, %edx 3351 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 3352 ; CHECK-SSE1-NEXT: xorl %ebx, %edx 3353 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r14d 3354 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 3355 ; CHECK-SSE1-NEXT: xorl %eax, %ecx 3356 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %eax 3357 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w 3358 ; CHECK-SSE1-NEXT: xorl %r11d, %r8d 3359 ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3360 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r9w 3361 ; CHECK-SSE1-NEXT: xorl %r10d, %r9d 3362 ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp 3363 ; CHECK-SSE1-NEXT: xorw %bx, %bp 3364 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bp 3365 ; CHECK-SSE1-NEXT: xorl %ebx, %ebp 3366 ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 3367 ; CHECK-SSE1-NEXT: xorw %ax, %bx 3368 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx 3369 ; CHECK-SSE1-NEXT: xorl %eax, %ebx 3370 ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax 3371 ; CHECK-SSE1-NEXT: xorw %r14w, %ax 3372 ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %ax 3373 ; CHECK-SSE1-NEXT: xorl %r14d, %eax 3374 ; CHECK-SSE1-NEXT: movw %ax, 14(%rdi) 3375 ; CHECK-SSE1-NEXT: movw %bx, 12(%rdi) 3376 ; CHECK-SSE1-NEXT: movw %bp, 10(%rdi) 3377 ; CHECK-SSE1-NEXT: movw %r9w, 8(%rdi) 3378 ; CHECK-SSE1-NEXT: movw %r8w, 6(%rdi) 3379 ; CHECK-SSE1-NEXT: movw %cx, 4(%rdi) 3380 ; CHECK-SSE1-NEXT: movw %dx, 2(%rdi) 3381 ; CHECK-SSE1-NEXT: movw %si, (%rdi) 3382 ; CHECK-SSE1-NEXT: movq %rdi, %rax 3383 ; CHECK-SSE1-NEXT: popq %rbx 3384 ; CHECK-SSE1-NEXT: popq %r14 3385 ; CHECK-SSE1-NEXT: popq %rbp 3386 ; CHECK-SSE1-NEXT: retq 3387 ; 3388 ; CHECK-SSE2-LABEL: in_v8i16: 3389 ; CHECK-SSE2: # %bb.0: 3390 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 3391 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 3392 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 3393 ; CHECK-SSE2-NEXT: retq 3394 ; 3395 ; CHECK-XOP-LABEL: in_v8i16: 3396 ; CHECK-XOP: # %bb.0: 3397 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 3398 ; CHECK-XOP-NEXT: retq 3399 %n0 = xor <8 x i16> %x, %y 3400 %n1 = and <8 x i16> %n0, %mask 3401 %r = xor <8 x i16> %n1, %y 3402 ret <8 x i16> %r 3403 } 3404 3405 define <4 x i32> @in_v4i32(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind { 3406 ; CHECK-BASELINE-LABEL: in_v4i32: 3407 ; CHECK-BASELINE: # %bb.0: 3408 ; CHECK-BASELINE-NEXT: pushq %rbx 3409 ; CHECK-BASELINE-NEXT: movl 12(%rdx), %r8d 3410 ; CHECK-BASELINE-NEXT: movl 8(%rdx), %r9d 3411 ; CHECK-BASELINE-NEXT: movl (%rdx), %r11d 3412 ; CHECK-BASELINE-NEXT: movl 4(%rdx), %r10d 3413 ; CHECK-BASELINE-NEXT: movl (%rsi), %edx 3414 ; CHECK-BASELINE-NEXT: xorl %r11d, %edx 3415 ; CHECK-BASELINE-NEXT: movl 4(%rsi), %eax 3416 ; CHECK-BASELINE-NEXT: xorl %r10d, %eax 3417 ; CHECK-BASELINE-NEXT: movl 8(%rsi), %ebx 3418 ; CHECK-BASELINE-NEXT: xorl %r9d, %ebx 3419 ; CHECK-BASELINE-NEXT: movl 12(%rsi), %esi 3420 ; CHECK-BASELINE-NEXT: xorl %r8d, %esi 3421 ; CHECK-BASELINE-NEXT: andl 12(%rcx), %esi 3422 ; CHECK-BASELINE-NEXT: andl 8(%rcx), %ebx 3423 ; CHECK-BASELINE-NEXT: andl 4(%rcx), %eax 3424 ; CHECK-BASELINE-NEXT: andl (%rcx), %edx 3425 ; CHECK-BASELINE-NEXT: xorl %r11d, %edx 3426 ; CHECK-BASELINE-NEXT: xorl %r10d, %eax 3427 ; CHECK-BASELINE-NEXT: xorl %r9d, %ebx 3428 ; CHECK-BASELINE-NEXT: xorl %r8d, %esi 3429 ; CHECK-BASELINE-NEXT: movl %esi, 12(%rdi) 3430 ; CHECK-BASELINE-NEXT: movl %ebx, 8(%rdi) 3431 ; CHECK-BASELINE-NEXT: movl %eax, 4(%rdi) 3432 ; CHECK-BASELINE-NEXT: movl %edx, (%rdi) 3433 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 3434 ; CHECK-BASELINE-NEXT: popq %rbx 3435 ; CHECK-BASELINE-NEXT: retq 3436 ; 3437 ; CHECK-SSE1-LABEL: in_v4i32: 3438 ; CHECK-SSE1: # %bb.0: 3439 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 3440 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 3441 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 3442 ; CHECK-SSE1-NEXT: andps (%rsi), %xmm0 3443 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 3444 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 3445 ; CHECK-SSE1-NEXT: movq %rdi, %rax 3446 ; CHECK-SSE1-NEXT: retq 3447 ; 3448 ; CHECK-SSE2-LABEL: in_v4i32: 3449 ; CHECK-SSE2: # %bb.0: 3450 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 3451 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 3452 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1 3453 ; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 3454 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 3455 ; CHECK-SSE2-NEXT: retq 3456 ; 3457 ; CHECK-XOP-LABEL: in_v4i32: 3458 ; CHECK-XOP: # %bb.0: 3459 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 3460 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 3461 ; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 3462 ; CHECK-XOP-NEXT: retq 3463 %x = load <4 x i32>, <4 x i32> *%px, align 16 3464 %y = load <4 x i32>, <4 x i32> *%py, align 16 3465 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 3466 %n0 = xor <4 x i32> %x, %y 3467 %n1 = and <4 x i32> %n0, %mask 3468 %r = xor <4 x i32> %n1, %y 3469 ret <4 x i32> %r 3470 } 3471 3472 define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { 3473 ; CHECK-BASELINE-LABEL: in_v2i64: 3474 ; CHECK-BASELINE: # %bb.0: 3475 ; CHECK-BASELINE-NEXT: xorq %rdx, %rdi 3476 ; CHECK-BASELINE-NEXT: xorq %rcx, %rsi 3477 ; CHECK-BASELINE-NEXT: andq %r9, %rsi 3478 ; CHECK-BASELINE-NEXT: andq %r8, %rdi 3479 ; CHECK-BASELINE-NEXT: xorq %rdx, %rdi 3480 ; CHECK-BASELINE-NEXT: xorq %rcx, %rsi 3481 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 3482 ; CHECK-BASELINE-NEXT: movq %rsi, %rdx 3483 ; CHECK-BASELINE-NEXT: retq 3484 ; 3485 ; CHECK-SSE1-LABEL: in_v2i64: 3486 ; CHECK-SSE1: # %bb.0: 3487 ; CHECK-SSE1-NEXT: xorq %rdx, %rdi 3488 ; CHECK-SSE1-NEXT: xorq %rcx, %rsi 3489 ; CHECK-SSE1-NEXT: andq %r9, %rsi 3490 ; CHECK-SSE1-NEXT: andq %r8, %rdi 3491 ; CHECK-SSE1-NEXT: xorq %rdx, %rdi 3492 ; CHECK-SSE1-NEXT: xorq %rcx, %rsi 3493 ; CHECK-SSE1-NEXT: movq %rdi, %rax 3494 ; CHECK-SSE1-NEXT: movq %rsi, %rdx 3495 ; CHECK-SSE1-NEXT: retq 3496 ; 3497 ; CHECK-SSE2-LABEL: in_v2i64: 3498 ; CHECK-SSE2: # %bb.0: 3499 ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 3500 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 3501 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 3502 ; CHECK-SSE2-NEXT: retq 3503 ; 3504 ; CHECK-XOP-LABEL: in_v2i64: 3505 ; CHECK-XOP: # %bb.0: 3506 ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 3507 ; CHECK-XOP-NEXT: retq 3508 %n0 = xor <2 x i64> %x, %y 3509 %n1 = and <2 x i64> %n0, %mask 3510 %r = xor <2 x i64> %n1, %y 3511 ret <2 x i64> %r 3512 } 3513 3514 ; ============================================================================ ; 3515 ; 256-bit vector width 3516 ; ============================================================================ ; 3517 3518 define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) nounwind { 3519 ; CHECK-BASELINE-LABEL: in_v32i8: 3520 ; CHECK-BASELINE: # %bb.0: 3521 ; CHECK-BASELINE-NEXT: pushq %rbp 3522 ; CHECK-BASELINE-NEXT: pushq %r15 3523 ; CHECK-BASELINE-NEXT: pushq %r14 3524 ; CHECK-BASELINE-NEXT: pushq %r13 3525 ; CHECK-BASELINE-NEXT: pushq %r12 3526 ; CHECK-BASELINE-NEXT: pushq %rbx 3527 ; CHECK-BASELINE-NEXT: movq %rdx, %r13 3528 ; CHECK-BASELINE-NEXT: movq %rsi, %rbx 3529 ; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3530 ; CHECK-BASELINE-NEXT: movb 16(%rdx), %r12b 3531 ; CHECK-BASELINE-NEXT: movb 15(%rdx), %al 3532 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3533 ; CHECK-BASELINE-NEXT: movb 14(%rdx), %al 3534 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3535 ; CHECK-BASELINE-NEXT: movb 13(%rdx), %al 3536 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3537 ; CHECK-BASELINE-NEXT: movb 12(%rdx), %al 3538 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3539 ; CHECK-BASELINE-NEXT: movb 11(%rdx), %al 3540 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3541 ; CHECK-BASELINE-NEXT: movb 10(%rdx), %al 3542 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3543 ; CHECK-BASELINE-NEXT: movb 9(%rdx), %r10b 3544 ; CHECK-BASELINE-NEXT: movb 8(%rdx), %r11b 3545 ; CHECK-BASELINE-NEXT: movb 7(%rdx), %r9b 3546 ; CHECK-BASELINE-NEXT: movb 6(%rdx), %r8b 3547 ; CHECK-BASELINE-NEXT: movb 5(%rdx), %bpl 3548 ; CHECK-BASELINE-NEXT: movb 4(%rdx), %dil 3549 ; CHECK-BASELINE-NEXT: movb 3(%rdx), %sil 3550 ; CHECK-BASELINE-NEXT: movb 2(%rdx), %r14b 3551 ; CHECK-BASELINE-NEXT: movb (%rdx), %al 3552 ; CHECK-BASELINE-NEXT: movb 1(%rdx), %r15b 3553 ; CHECK-BASELINE-NEXT: movb (%rbx), %dl 3554 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3555 ; CHECK-BASELINE-NEXT: andb (%rcx), %dl 3556 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3557 ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3558 ; CHECK-BASELINE-NEXT: movb 1(%rbx), %al 3559 ; CHECK-BASELINE-NEXT: xorb %r15b, %al 3560 ; CHECK-BASELINE-NEXT: andb 1(%rcx), %al 3561 ; CHECK-BASELINE-NEXT: xorb %r15b, %al 3562 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3563 ; CHECK-BASELINE-NEXT: movb 2(%rbx), %al 3564 ; CHECK-BASELINE-NEXT: xorb %r14b, %al 3565 ; CHECK-BASELINE-NEXT: andb 2(%rcx), %al 3566 ; CHECK-BASELINE-NEXT: xorb %r14b, %al 3567 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3568 ; CHECK-BASELINE-NEXT: movb 3(%rbx), %al 3569 ; CHECK-BASELINE-NEXT: xorb %sil, %al 3570 ; CHECK-BASELINE-NEXT: andb 3(%rcx), %al 3571 ; CHECK-BASELINE-NEXT: xorb %sil, %al 3572 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3573 ; CHECK-BASELINE-NEXT: movb 4(%rbx), %al 3574 ; CHECK-BASELINE-NEXT: xorb %dil, %al 3575 ; CHECK-BASELINE-NEXT: andb 4(%rcx), %al 3576 ; CHECK-BASELINE-NEXT: xorb %dil, %al 3577 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3578 ; CHECK-BASELINE-NEXT: movb 5(%rbx), %al 3579 ; CHECK-BASELINE-NEXT: xorb %bpl, %al 3580 ; CHECK-BASELINE-NEXT: andb 5(%rcx), %al 3581 ; CHECK-BASELINE-NEXT: xorb %bpl, %al 3582 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3583 ; CHECK-BASELINE-NEXT: movb 6(%rbx), %al 3584 ; CHECK-BASELINE-NEXT: xorb %r8b, %al 3585 ; CHECK-BASELINE-NEXT: andb 6(%rcx), %al 3586 ; CHECK-BASELINE-NEXT: xorb %r8b, %al 3587 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3588 ; CHECK-BASELINE-NEXT: movb 7(%rbx), %al 3589 ; CHECK-BASELINE-NEXT: xorb %r9b, %al 3590 ; CHECK-BASELINE-NEXT: andb 7(%rcx), %al 3591 ; CHECK-BASELINE-NEXT: xorb %r9b, %al 3592 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3593 ; CHECK-BASELINE-NEXT: movb 8(%rbx), %al 3594 ; CHECK-BASELINE-NEXT: xorb %r11b, %al 3595 ; CHECK-BASELINE-NEXT: andb 8(%rcx), %al 3596 ; CHECK-BASELINE-NEXT: xorb %r11b, %al 3597 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3598 ; CHECK-BASELINE-NEXT: movb 9(%rbx), %al 3599 ; CHECK-BASELINE-NEXT: xorb %r10b, %al 3600 ; CHECK-BASELINE-NEXT: andb 9(%rcx), %al 3601 ; CHECK-BASELINE-NEXT: xorb %r10b, %al 3602 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3603 ; CHECK-BASELINE-NEXT: movb 10(%rbx), %dl 3604 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3605 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3606 ; CHECK-BASELINE-NEXT: andb 10(%rcx), %dl 3607 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3608 ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3609 ; CHECK-BASELINE-NEXT: movb 11(%rbx), %dl 3610 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3611 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3612 ; CHECK-BASELINE-NEXT: andb 11(%rcx), %dl 3613 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3614 ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3615 ; CHECK-BASELINE-NEXT: movb 12(%rbx), %dl 3616 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3617 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3618 ; CHECK-BASELINE-NEXT: andb 12(%rcx), %dl 3619 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3620 ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3621 ; CHECK-BASELINE-NEXT: movb 13(%rbx), %dl 3622 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3623 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3624 ; CHECK-BASELINE-NEXT: andb 13(%rcx), %dl 3625 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3626 ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3627 ; CHECK-BASELINE-NEXT: movb 14(%rbx), %dl 3628 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3629 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3630 ; CHECK-BASELINE-NEXT: andb 14(%rcx), %dl 3631 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3632 ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3633 ; CHECK-BASELINE-NEXT: movb 15(%rbx), %dl 3634 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3635 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3636 ; CHECK-BASELINE-NEXT: andb 15(%rcx), %dl 3637 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3638 ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3639 ; CHECK-BASELINE-NEXT: movb 16(%rbx), %al 3640 ; CHECK-BASELINE-NEXT: xorb %r12b, %al 3641 ; CHECK-BASELINE-NEXT: andb 16(%rcx), %al 3642 ; CHECK-BASELINE-NEXT: xorb %r12b, %al 3643 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3644 ; CHECK-BASELINE-NEXT: movb 17(%r13), %al 3645 ; CHECK-BASELINE-NEXT: movb 17(%rbx), %dl 3646 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3647 ; CHECK-BASELINE-NEXT: andb 17(%rcx), %dl 3648 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3649 ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3650 ; CHECK-BASELINE-NEXT: movb 18(%r13), %al 3651 ; CHECK-BASELINE-NEXT: movb 18(%rbx), %dl 3652 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3653 ; CHECK-BASELINE-NEXT: andb 18(%rcx), %dl 3654 ; CHECK-BASELINE-NEXT: xorb %al, %dl 3655 ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3656 ; CHECK-BASELINE-NEXT: movb 19(%r13), %al 3657 ; CHECK-BASELINE-NEXT: movb 19(%rbx), %r12b 3658 ; CHECK-BASELINE-NEXT: xorb %al, %r12b 3659 ; CHECK-BASELINE-NEXT: andb 19(%rcx), %r12b 3660 ; CHECK-BASELINE-NEXT: movq %rcx, %rdx 3661 ; CHECK-BASELINE-NEXT: xorb %al, %r12b 3662 ; CHECK-BASELINE-NEXT: movb 20(%r13), %al 3663 ; CHECK-BASELINE-NEXT: movb 20(%rbx), %r14b 3664 ; CHECK-BASELINE-NEXT: xorb %al, %r14b 3665 ; CHECK-BASELINE-NEXT: andb 20(%rcx), %r14b 3666 ; CHECK-BASELINE-NEXT: xorb %al, %r14b 3667 ; CHECK-BASELINE-NEXT: movb 21(%r13), %al 3668 ; CHECK-BASELINE-NEXT: movb 21(%rbx), %r15b 3669 ; CHECK-BASELINE-NEXT: xorb %al, %r15b 3670 ; CHECK-BASELINE-NEXT: andb 21(%rcx), %r15b 3671 ; CHECK-BASELINE-NEXT: xorb %al, %r15b 3672 ; CHECK-BASELINE-NEXT: movb 22(%r13), %al 3673 ; CHECK-BASELINE-NEXT: movb 22(%rbx), %bpl 3674 ; CHECK-BASELINE-NEXT: xorb %al, %bpl 3675 ; CHECK-BASELINE-NEXT: andb 22(%rcx), %bpl 3676 ; CHECK-BASELINE-NEXT: xorb %al, %bpl 3677 ; CHECK-BASELINE-NEXT: movb 23(%r13), %al 3678 ; CHECK-BASELINE-NEXT: movb 23(%rbx), %r11b 3679 ; CHECK-BASELINE-NEXT: xorb %al, %r11b 3680 ; CHECK-BASELINE-NEXT: andb 23(%rcx), %r11b 3681 ; CHECK-BASELINE-NEXT: xorb %al, %r11b 3682 ; CHECK-BASELINE-NEXT: movb 24(%r13), %al 3683 ; CHECK-BASELINE-NEXT: movb 24(%rbx), %r10b 3684 ; CHECK-BASELINE-NEXT: xorb %al, %r10b 3685 ; CHECK-BASELINE-NEXT: andb 24(%rcx), %r10b 3686 ; CHECK-BASELINE-NEXT: xorb %al, %r10b 3687 ; CHECK-BASELINE-NEXT: movb 25(%r13), %al 3688 ; CHECK-BASELINE-NEXT: movb 25(%rbx), %r9b 3689 ; CHECK-BASELINE-NEXT: xorb %al, %r9b 3690 ; CHECK-BASELINE-NEXT: andb 25(%rcx), %r9b 3691 ; CHECK-BASELINE-NEXT: xorb %al, %r9b 3692 ; CHECK-BASELINE-NEXT: movb 26(%r13), %al 3693 ; CHECK-BASELINE-NEXT: movb 26(%rbx), %r8b 3694 ; CHECK-BASELINE-NEXT: xorb %al, %r8b 3695 ; CHECK-BASELINE-NEXT: andb 26(%rcx), %r8b 3696 ; CHECK-BASELINE-NEXT: xorb %al, %r8b 3697 ; CHECK-BASELINE-NEXT: movb 27(%r13), %al 3698 ; CHECK-BASELINE-NEXT: movb 27(%rbx), %dil 3699 ; CHECK-BASELINE-NEXT: xorb %al, %dil 3700 ; CHECK-BASELINE-NEXT: andb 27(%rcx), %dil 3701 ; CHECK-BASELINE-NEXT: xorb %al, %dil 3702 ; CHECK-BASELINE-NEXT: movb 28(%r13), %al 3703 ; CHECK-BASELINE-NEXT: movb 28(%rbx), %sil 3704 ; CHECK-BASELINE-NEXT: xorb %al, %sil 3705 ; CHECK-BASELINE-NEXT: andb 28(%rcx), %sil 3706 ; CHECK-BASELINE-NEXT: xorb %al, %sil 3707 ; CHECK-BASELINE-NEXT: movb 29(%r13), %al 3708 ; CHECK-BASELINE-NEXT: movb 29(%rbx), %cl 3709 ; CHECK-BASELINE-NEXT: xorb %al, %cl 3710 ; CHECK-BASELINE-NEXT: andb 29(%rdx), %cl 3711 ; CHECK-BASELINE-NEXT: xorb %al, %cl 3712 ; CHECK-BASELINE-NEXT: movb 30(%r13), %al 3713 ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3714 ; CHECK-BASELINE-NEXT: movb 30(%rbx), %al 3715 ; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload 3716 ; CHECK-BASELINE-NEXT: andb 30(%rdx), %al 3717 ; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload 3718 ; CHECK-BASELINE-NEXT: movb 31(%r13), %r13b 3719 ; CHECK-BASELINE-NEXT: movb 31(%rbx), %bl 3720 ; CHECK-BASELINE-NEXT: xorb %r13b, %bl 3721 ; CHECK-BASELINE-NEXT: andb 31(%rdx), %bl 3722 ; CHECK-BASELINE-NEXT: xorb %r13b, %bl 3723 ; CHECK-BASELINE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload 3724 ; CHECK-BASELINE-NEXT: movb %bl, 31(%r13) 3725 ; CHECK-BASELINE-NEXT: movb %al, 30(%r13) 3726 ; CHECK-BASELINE-NEXT: movb %cl, 29(%r13) 3727 ; CHECK-BASELINE-NEXT: movb %sil, 28(%r13) 3728 ; CHECK-BASELINE-NEXT: movb %dil, 27(%r13) 3729 ; CHECK-BASELINE-NEXT: movb %r8b, 26(%r13) 3730 ; CHECK-BASELINE-NEXT: movb %r9b, 25(%r13) 3731 ; CHECK-BASELINE-NEXT: movb %r10b, 24(%r13) 3732 ; CHECK-BASELINE-NEXT: movb %r11b, 23(%r13) 3733 ; CHECK-BASELINE-NEXT: movb %bpl, 22(%r13) 3734 ; CHECK-BASELINE-NEXT: movb %r15b, 21(%r13) 3735 ; CHECK-BASELINE-NEXT: movb %r14b, 20(%r13) 3736 ; CHECK-BASELINE-NEXT: movb %r12b, 19(%r13) 3737 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3738 ; CHECK-BASELINE-NEXT: movb %al, 18(%r13) 3739 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3740 ; CHECK-BASELINE-NEXT: movb %al, 17(%r13) 3741 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3742 ; CHECK-BASELINE-NEXT: movb %al, 16(%r13) 3743 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3744 ; CHECK-BASELINE-NEXT: movb %al, 15(%r13) 3745 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3746 ; CHECK-BASELINE-NEXT: movb %al, 14(%r13) 3747 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3748 ; CHECK-BASELINE-NEXT: movb %al, 13(%r13) 3749 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3750 ; CHECK-BASELINE-NEXT: movb %al, 12(%r13) 3751 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3752 ; CHECK-BASELINE-NEXT: movb %al, 11(%r13) 3753 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3754 ; CHECK-BASELINE-NEXT: movb %al, 10(%r13) 3755 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3756 ; CHECK-BASELINE-NEXT: movb %al, 9(%r13) 3757 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3758 ; CHECK-BASELINE-NEXT: movb %al, 8(%r13) 3759 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3760 ; CHECK-BASELINE-NEXT: movb %al, 7(%r13) 3761 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3762 ; CHECK-BASELINE-NEXT: movb %al, 6(%r13) 3763 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3764 ; CHECK-BASELINE-NEXT: movb %al, 5(%r13) 3765 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3766 ; CHECK-BASELINE-NEXT: movb %al, 4(%r13) 3767 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3768 ; CHECK-BASELINE-NEXT: movb %al, 3(%r13) 3769 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3770 ; CHECK-BASELINE-NEXT: movb %al, 2(%r13) 3771 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3772 ; CHECK-BASELINE-NEXT: movb %al, 1(%r13) 3773 ; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3774 ; CHECK-BASELINE-NEXT: movb %al, (%r13) 3775 ; CHECK-BASELINE-NEXT: movq %r13, %rax 3776 ; CHECK-BASELINE-NEXT: popq %rbx 3777 ; CHECK-BASELINE-NEXT: popq %r12 3778 ; CHECK-BASELINE-NEXT: popq %r13 3779 ; CHECK-BASELINE-NEXT: popq %r14 3780 ; CHECK-BASELINE-NEXT: popq %r15 3781 ; CHECK-BASELINE-NEXT: popq %rbp 3782 ; CHECK-BASELINE-NEXT: retq 3783 ; 3784 ; CHECK-SSE1-LABEL: in_v32i8: 3785 ; CHECK-SSE1: # %bb.0: 3786 ; CHECK-SSE1-NEXT: pushq %rbp 3787 ; CHECK-SSE1-NEXT: pushq %r15 3788 ; CHECK-SSE1-NEXT: pushq %r14 3789 ; CHECK-SSE1-NEXT: pushq %r13 3790 ; CHECK-SSE1-NEXT: pushq %r12 3791 ; CHECK-SSE1-NEXT: pushq %rbx 3792 ; CHECK-SSE1-NEXT: movq %rdx, %r13 3793 ; CHECK-SSE1-NEXT: movq %rsi, %rbx 3794 ; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3795 ; CHECK-SSE1-NEXT: movb 16(%rdx), %r12b 3796 ; CHECK-SSE1-NEXT: movb 15(%rdx), %al 3797 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3798 ; CHECK-SSE1-NEXT: movb 14(%rdx), %al 3799 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3800 ; CHECK-SSE1-NEXT: movb 13(%rdx), %al 3801 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3802 ; CHECK-SSE1-NEXT: movb 12(%rdx), %al 3803 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3804 ; CHECK-SSE1-NEXT: movb 11(%rdx), %al 3805 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3806 ; CHECK-SSE1-NEXT: movb 10(%rdx), %al 3807 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3808 ; CHECK-SSE1-NEXT: movb 9(%rdx), %r10b 3809 ; CHECK-SSE1-NEXT: movb 8(%rdx), %r11b 3810 ; CHECK-SSE1-NEXT: movb 7(%rdx), %r9b 3811 ; CHECK-SSE1-NEXT: movb 6(%rdx), %r8b 3812 ; CHECK-SSE1-NEXT: movb 5(%rdx), %bpl 3813 ; CHECK-SSE1-NEXT: movb 4(%rdx), %dil 3814 ; CHECK-SSE1-NEXT: movb 3(%rdx), %sil 3815 ; CHECK-SSE1-NEXT: movb 2(%rdx), %r14b 3816 ; CHECK-SSE1-NEXT: movb (%rdx), %al 3817 ; CHECK-SSE1-NEXT: movb 1(%rdx), %r15b 3818 ; CHECK-SSE1-NEXT: movb (%rbx), %dl 3819 ; CHECK-SSE1-NEXT: xorb %al, %dl 3820 ; CHECK-SSE1-NEXT: andb (%rcx), %dl 3821 ; CHECK-SSE1-NEXT: xorb %al, %dl 3822 ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3823 ; CHECK-SSE1-NEXT: movb 1(%rbx), %al 3824 ; CHECK-SSE1-NEXT: xorb %r15b, %al 3825 ; CHECK-SSE1-NEXT: andb 1(%rcx), %al 3826 ; CHECK-SSE1-NEXT: xorb %r15b, %al 3827 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3828 ; CHECK-SSE1-NEXT: movb 2(%rbx), %al 3829 ; CHECK-SSE1-NEXT: xorb %r14b, %al 3830 ; CHECK-SSE1-NEXT: andb 2(%rcx), %al 3831 ; CHECK-SSE1-NEXT: xorb %r14b, %al 3832 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3833 ; CHECK-SSE1-NEXT: movb 3(%rbx), %al 3834 ; CHECK-SSE1-NEXT: xorb %sil, %al 3835 ; CHECK-SSE1-NEXT: andb 3(%rcx), %al 3836 ; CHECK-SSE1-NEXT: xorb %sil, %al 3837 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3838 ; CHECK-SSE1-NEXT: movb 4(%rbx), %al 3839 ; CHECK-SSE1-NEXT: xorb %dil, %al 3840 ; CHECK-SSE1-NEXT: andb 4(%rcx), %al 3841 ; CHECK-SSE1-NEXT: xorb %dil, %al 3842 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3843 ; CHECK-SSE1-NEXT: movb 5(%rbx), %al 3844 ; CHECK-SSE1-NEXT: xorb %bpl, %al 3845 ; CHECK-SSE1-NEXT: andb 5(%rcx), %al 3846 ; CHECK-SSE1-NEXT: xorb %bpl, %al 3847 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3848 ; CHECK-SSE1-NEXT: movb 6(%rbx), %al 3849 ; CHECK-SSE1-NEXT: xorb %r8b, %al 3850 ; CHECK-SSE1-NEXT: andb 6(%rcx), %al 3851 ; CHECK-SSE1-NEXT: xorb %r8b, %al 3852 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3853 ; CHECK-SSE1-NEXT: movb 7(%rbx), %al 3854 ; CHECK-SSE1-NEXT: xorb %r9b, %al 3855 ; CHECK-SSE1-NEXT: andb 7(%rcx), %al 3856 ; CHECK-SSE1-NEXT: xorb %r9b, %al 3857 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3858 ; CHECK-SSE1-NEXT: movb 8(%rbx), %al 3859 ; CHECK-SSE1-NEXT: xorb %r11b, %al 3860 ; CHECK-SSE1-NEXT: andb 8(%rcx), %al 3861 ; CHECK-SSE1-NEXT: xorb %r11b, %al 3862 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3863 ; CHECK-SSE1-NEXT: movb 9(%rbx), %al 3864 ; CHECK-SSE1-NEXT: xorb %r10b, %al 3865 ; CHECK-SSE1-NEXT: andb 9(%rcx), %al 3866 ; CHECK-SSE1-NEXT: xorb %r10b, %al 3867 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3868 ; CHECK-SSE1-NEXT: movb 10(%rbx), %dl 3869 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3870 ; CHECK-SSE1-NEXT: xorb %al, %dl 3871 ; CHECK-SSE1-NEXT: andb 10(%rcx), %dl 3872 ; CHECK-SSE1-NEXT: xorb %al, %dl 3873 ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3874 ; CHECK-SSE1-NEXT: movb 11(%rbx), %dl 3875 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3876 ; CHECK-SSE1-NEXT: xorb %al, %dl 3877 ; CHECK-SSE1-NEXT: andb 11(%rcx), %dl 3878 ; CHECK-SSE1-NEXT: xorb %al, %dl 3879 ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3880 ; CHECK-SSE1-NEXT: movb 12(%rbx), %dl 3881 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3882 ; CHECK-SSE1-NEXT: xorb %al, %dl 3883 ; CHECK-SSE1-NEXT: andb 12(%rcx), %dl 3884 ; CHECK-SSE1-NEXT: xorb %al, %dl 3885 ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3886 ; CHECK-SSE1-NEXT: movb 13(%rbx), %dl 3887 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3888 ; CHECK-SSE1-NEXT: xorb %al, %dl 3889 ; CHECK-SSE1-NEXT: andb 13(%rcx), %dl 3890 ; CHECK-SSE1-NEXT: xorb %al, %dl 3891 ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3892 ; CHECK-SSE1-NEXT: movb 14(%rbx), %dl 3893 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3894 ; CHECK-SSE1-NEXT: xorb %al, %dl 3895 ; CHECK-SSE1-NEXT: andb 14(%rcx), %dl 3896 ; CHECK-SSE1-NEXT: xorb %al, %dl 3897 ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3898 ; CHECK-SSE1-NEXT: movb 15(%rbx), %dl 3899 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3900 ; CHECK-SSE1-NEXT: xorb %al, %dl 3901 ; CHECK-SSE1-NEXT: andb 15(%rcx), %dl 3902 ; CHECK-SSE1-NEXT: xorb %al, %dl 3903 ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3904 ; CHECK-SSE1-NEXT: movb 16(%rbx), %al 3905 ; CHECK-SSE1-NEXT: xorb %r12b, %al 3906 ; CHECK-SSE1-NEXT: andb 16(%rcx), %al 3907 ; CHECK-SSE1-NEXT: xorb %r12b, %al 3908 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3909 ; CHECK-SSE1-NEXT: movb 17(%r13), %al 3910 ; CHECK-SSE1-NEXT: movb 17(%rbx), %dl 3911 ; CHECK-SSE1-NEXT: xorb %al, %dl 3912 ; CHECK-SSE1-NEXT: andb 17(%rcx), %dl 3913 ; CHECK-SSE1-NEXT: xorb %al, %dl 3914 ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3915 ; CHECK-SSE1-NEXT: movb 18(%r13), %al 3916 ; CHECK-SSE1-NEXT: movb 18(%rbx), %dl 3917 ; CHECK-SSE1-NEXT: xorb %al, %dl 3918 ; CHECK-SSE1-NEXT: andb 18(%rcx), %dl 3919 ; CHECK-SSE1-NEXT: xorb %al, %dl 3920 ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3921 ; CHECK-SSE1-NEXT: movb 19(%r13), %al 3922 ; CHECK-SSE1-NEXT: movb 19(%rbx), %r12b 3923 ; CHECK-SSE1-NEXT: xorb %al, %r12b 3924 ; CHECK-SSE1-NEXT: andb 19(%rcx), %r12b 3925 ; CHECK-SSE1-NEXT: movq %rcx, %rdx 3926 ; CHECK-SSE1-NEXT: xorb %al, %r12b 3927 ; CHECK-SSE1-NEXT: movb 20(%r13), %al 3928 ; CHECK-SSE1-NEXT: movb 20(%rbx), %r14b 3929 ; CHECK-SSE1-NEXT: xorb %al, %r14b 3930 ; CHECK-SSE1-NEXT: andb 20(%rcx), %r14b 3931 ; CHECK-SSE1-NEXT: xorb %al, %r14b 3932 ; CHECK-SSE1-NEXT: movb 21(%r13), %al 3933 ; CHECK-SSE1-NEXT: movb 21(%rbx), %r15b 3934 ; CHECK-SSE1-NEXT: xorb %al, %r15b 3935 ; CHECK-SSE1-NEXT: andb 21(%rcx), %r15b 3936 ; CHECK-SSE1-NEXT: xorb %al, %r15b 3937 ; CHECK-SSE1-NEXT: movb 22(%r13), %al 3938 ; CHECK-SSE1-NEXT: movb 22(%rbx), %bpl 3939 ; CHECK-SSE1-NEXT: xorb %al, %bpl 3940 ; CHECK-SSE1-NEXT: andb 22(%rcx), %bpl 3941 ; CHECK-SSE1-NEXT: xorb %al, %bpl 3942 ; CHECK-SSE1-NEXT: movb 23(%r13), %al 3943 ; CHECK-SSE1-NEXT: movb 23(%rbx), %r11b 3944 ; CHECK-SSE1-NEXT: xorb %al, %r11b 3945 ; CHECK-SSE1-NEXT: andb 23(%rcx), %r11b 3946 ; CHECK-SSE1-NEXT: xorb %al, %r11b 3947 ; CHECK-SSE1-NEXT: movb 24(%r13), %al 3948 ; CHECK-SSE1-NEXT: movb 24(%rbx), %r10b 3949 ; CHECK-SSE1-NEXT: xorb %al, %r10b 3950 ; CHECK-SSE1-NEXT: andb 24(%rcx), %r10b 3951 ; CHECK-SSE1-NEXT: xorb %al, %r10b 3952 ; CHECK-SSE1-NEXT: movb 25(%r13), %al 3953 ; CHECK-SSE1-NEXT: movb 25(%rbx), %r9b 3954 ; CHECK-SSE1-NEXT: xorb %al, %r9b 3955 ; CHECK-SSE1-NEXT: andb 25(%rcx), %r9b 3956 ; CHECK-SSE1-NEXT: xorb %al, %r9b 3957 ; CHECK-SSE1-NEXT: movb 26(%r13), %al 3958 ; CHECK-SSE1-NEXT: movb 26(%rbx), %r8b 3959 ; CHECK-SSE1-NEXT: xorb %al, %r8b 3960 ; CHECK-SSE1-NEXT: andb 26(%rcx), %r8b 3961 ; CHECK-SSE1-NEXT: xorb %al, %r8b 3962 ; CHECK-SSE1-NEXT: movb 27(%r13), %al 3963 ; CHECK-SSE1-NEXT: movb 27(%rbx), %dil 3964 ; CHECK-SSE1-NEXT: xorb %al, %dil 3965 ; CHECK-SSE1-NEXT: andb 27(%rcx), %dil 3966 ; CHECK-SSE1-NEXT: xorb %al, %dil 3967 ; CHECK-SSE1-NEXT: movb 28(%r13), %al 3968 ; CHECK-SSE1-NEXT: movb 28(%rbx), %sil 3969 ; CHECK-SSE1-NEXT: xorb %al, %sil 3970 ; CHECK-SSE1-NEXT: andb 28(%rcx), %sil 3971 ; CHECK-SSE1-NEXT: xorb %al, %sil 3972 ; CHECK-SSE1-NEXT: movb 29(%r13), %al 3973 ; CHECK-SSE1-NEXT: movb 29(%rbx), %cl 3974 ; CHECK-SSE1-NEXT: xorb %al, %cl 3975 ; CHECK-SSE1-NEXT: andb 29(%rdx), %cl 3976 ; CHECK-SSE1-NEXT: xorb %al, %cl 3977 ; CHECK-SSE1-NEXT: movb 30(%r13), %al 3978 ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3979 ; CHECK-SSE1-NEXT: movb 30(%rbx), %al 3980 ; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload 3981 ; CHECK-SSE1-NEXT: andb 30(%rdx), %al 3982 ; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload 3983 ; CHECK-SSE1-NEXT: movb 31(%r13), %r13b 3984 ; CHECK-SSE1-NEXT: movb 31(%rbx), %bl 3985 ; CHECK-SSE1-NEXT: xorb %r13b, %bl 3986 ; CHECK-SSE1-NEXT: andb 31(%rdx), %bl 3987 ; CHECK-SSE1-NEXT: xorb %r13b, %bl 3988 ; CHECK-SSE1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload 3989 ; CHECK-SSE1-NEXT: movb %bl, 31(%r13) 3990 ; CHECK-SSE1-NEXT: movb %al, 30(%r13) 3991 ; CHECK-SSE1-NEXT: movb %cl, 29(%r13) 3992 ; CHECK-SSE1-NEXT: movb %sil, 28(%r13) 3993 ; CHECK-SSE1-NEXT: movb %dil, 27(%r13) 3994 ; CHECK-SSE1-NEXT: movb %r8b, 26(%r13) 3995 ; CHECK-SSE1-NEXT: movb %r9b, 25(%r13) 3996 ; CHECK-SSE1-NEXT: movb %r10b, 24(%r13) 3997 ; CHECK-SSE1-NEXT: movb %r11b, 23(%r13) 3998 ; CHECK-SSE1-NEXT: movb %bpl, 22(%r13) 3999 ; CHECK-SSE1-NEXT: movb %r15b, 21(%r13) 4000 ; CHECK-SSE1-NEXT: movb %r14b, 20(%r13) 4001 ; CHECK-SSE1-NEXT: movb %r12b, 19(%r13) 4002 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4003 ; CHECK-SSE1-NEXT: movb %al, 18(%r13) 4004 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4005 ; CHECK-SSE1-NEXT: movb %al, 17(%r13) 4006 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4007 ; CHECK-SSE1-NEXT: movb %al, 16(%r13) 4008 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4009 ; CHECK-SSE1-NEXT: movb %al, 15(%r13) 4010 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4011 ; CHECK-SSE1-NEXT: movb %al, 14(%r13) 4012 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4013 ; CHECK-SSE1-NEXT: movb %al, 13(%r13) 4014 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4015 ; CHECK-SSE1-NEXT: movb %al, 12(%r13) 4016 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4017 ; CHECK-SSE1-NEXT: movb %al, 11(%r13) 4018 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4019 ; CHECK-SSE1-NEXT: movb %al, 10(%r13) 4020 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4021 ; CHECK-SSE1-NEXT: movb %al, 9(%r13) 4022 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4023 ; CHECK-SSE1-NEXT: movb %al, 8(%r13) 4024 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4025 ; CHECK-SSE1-NEXT: movb %al, 7(%r13) 4026 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4027 ; CHECK-SSE1-NEXT: movb %al, 6(%r13) 4028 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4029 ; CHECK-SSE1-NEXT: movb %al, 5(%r13) 4030 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4031 ; CHECK-SSE1-NEXT: movb %al, 4(%r13) 4032 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4033 ; CHECK-SSE1-NEXT: movb %al, 3(%r13) 4034 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4035 ; CHECK-SSE1-NEXT: movb %al, 2(%r13) 4036 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4037 ; CHECK-SSE1-NEXT: movb %al, 1(%r13) 4038 ; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4039 ; CHECK-SSE1-NEXT: movb %al, (%r13) 4040 ; CHECK-SSE1-NEXT: movq %r13, %rax 4041 ; CHECK-SSE1-NEXT: popq %rbx 4042 ; CHECK-SSE1-NEXT: popq %r12 4043 ; CHECK-SSE1-NEXT: popq %r13 4044 ; CHECK-SSE1-NEXT: popq %r14 4045 ; CHECK-SSE1-NEXT: popq %r15 4046 ; CHECK-SSE1-NEXT: popq %rbp 4047 ; CHECK-SSE1-NEXT: retq 4048 ; 4049 ; CHECK-SSE2-LABEL: in_v32i8: 4050 ; CHECK-SSE2: # %bb.0: 4051 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4052 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4053 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4054 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4055 ; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4056 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4057 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4058 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4059 ; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4060 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4061 ; CHECK-SSE2-NEXT: retq 4062 ; 4063 ; CHECK-XOP-LABEL: in_v32i8: 4064 ; CHECK-XOP: # %bb.0: 4065 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4066 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4067 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4068 ; CHECK-XOP-NEXT: retq 4069 %x = load <32 x i8>, <32 x i8> *%px, align 32 4070 %y = load <32 x i8>, <32 x i8> *%py, align 32 4071 %mask = load <32 x i8>, <32 x i8> *%pmask, align 32 4072 %n0 = xor <32 x i8> %x, %y 4073 %n1 = and <32 x i8> %n0, %mask 4074 %r = xor <32 x i8> %n1, %y 4075 ret <32 x i8> %r 4076 } 4077 4078 define <16 x i16> @in_v16i16(<16 x i16> *%px, <16 x i16> *%py, <16 x i16> *%pmask) nounwind { 4079 ; CHECK-BASELINE-LABEL: in_v16i16: 4080 ; CHECK-BASELINE: # %bb.0: 4081 ; CHECK-BASELINE-NEXT: pushq %rbp 4082 ; CHECK-BASELINE-NEXT: pushq %r15 4083 ; CHECK-BASELINE-NEXT: pushq %r14 4084 ; CHECK-BASELINE-NEXT: pushq %r13 4085 ; CHECK-BASELINE-NEXT: pushq %r12 4086 ; CHECK-BASELINE-NEXT: pushq %rbx 4087 ; CHECK-BASELINE-NEXT: movq %rcx, %r8 4088 ; CHECK-BASELINE-NEXT: movzwl 30(%rdx), %eax 4089 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4090 ; CHECK-BASELINE-NEXT: movl 28(%rdx), %eax 4091 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4092 ; CHECK-BASELINE-NEXT: movzwl 26(%rdx), %eax 4093 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4094 ; CHECK-BASELINE-NEXT: movl 24(%rdx), %eax 4095 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4096 ; CHECK-BASELINE-NEXT: movzwl 22(%rdx), %eax 4097 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4098 ; CHECK-BASELINE-NEXT: movl 20(%rdx), %r13d 4099 ; CHECK-BASELINE-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4100 ; CHECK-BASELINE-NEXT: movzwl 18(%rdx), %r11d 4101 ; CHECK-BASELINE-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4102 ; CHECK-BASELINE-NEXT: movl 16(%rdx), %r14d 4103 ; CHECK-BASELINE-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4104 ; CHECK-BASELINE-NEXT: movzwl 14(%rdx), %r15d 4105 ; CHECK-BASELINE-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4106 ; CHECK-BASELINE-NEXT: movl 12(%rdx), %r12d 4107 ; CHECK-BASELINE-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4108 ; CHECK-BASELINE-NEXT: movzwl 10(%rdx), %r10d 4109 ; CHECK-BASELINE-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4110 ; CHECK-BASELINE-NEXT: movl 8(%rdx), %r9d 4111 ; CHECK-BASELINE-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4112 ; CHECK-BASELINE-NEXT: movzwl 6(%rdx), %ebx 4113 ; CHECK-BASELINE-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4114 ; CHECK-BASELINE-NEXT: movl (%rdx), %eax 4115 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4116 ; CHECK-BASELINE-NEXT: movl 4(%rdx), %ebp 4117 ; CHECK-BASELINE-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4118 ; CHECK-BASELINE-NEXT: movzwl 2(%rdx), %ecx 4119 ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4120 ; CHECK-BASELINE-NEXT: movzwl (%rsi), %edx 4121 ; CHECK-BASELINE-NEXT: xorw %ax, %dx 4122 ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4123 ; CHECK-BASELINE-NEXT: movzwl 2(%rsi), %eax 4124 ; CHECK-BASELINE-NEXT: xorw %cx, %ax 4125 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4126 ; CHECK-BASELINE-NEXT: movzwl 4(%rsi), %eax 4127 ; CHECK-BASELINE-NEXT: xorw %bp, %ax 4128 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4129 ; CHECK-BASELINE-NEXT: movzwl 6(%rsi), %edx 4130 ; CHECK-BASELINE-NEXT: xorw %bx, %dx 4131 ; CHECK-BASELINE-NEXT: movl %edx, %eax 4132 ; CHECK-BASELINE-NEXT: movzwl 8(%rsi), %ecx 4133 ; CHECK-BASELINE-NEXT: xorw %r9w, %cx 4134 ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4135 ; CHECK-BASELINE-NEXT: movzwl 10(%rsi), %edx 4136 ; CHECK-BASELINE-NEXT: xorw %r10w, %dx 4137 ; CHECK-BASELINE-NEXT: movl %edx, %ecx 4138 ; CHECK-BASELINE-NEXT: movzwl 12(%rsi), %edx 4139 ; CHECK-BASELINE-NEXT: xorw %r12w, %dx 4140 ; CHECK-BASELINE-NEXT: movzwl 14(%rsi), %r12d 4141 ; CHECK-BASELINE-NEXT: xorw %r15w, %r12w 4142 ; CHECK-BASELINE-NEXT: movzwl 16(%rsi), %r15d 4143 ; CHECK-BASELINE-NEXT: xorw %r14w, %r15w 4144 ; CHECK-BASELINE-NEXT: movzwl 18(%rsi), %r14d 4145 ; CHECK-BASELINE-NEXT: xorw %r11w, %r14w 4146 ; CHECK-BASELINE-NEXT: movzwl 20(%rsi), %ebp 4147 ; CHECK-BASELINE-NEXT: xorw %r13w, %bp 4148 ; CHECK-BASELINE-NEXT: movzwl 22(%rsi), %ebx 4149 ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload 4150 ; CHECK-BASELINE-NEXT: movzwl 24(%rsi), %r11d 4151 ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload 4152 ; CHECK-BASELINE-NEXT: movzwl 26(%rsi), %r10d 4153 ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r10w # 2-byte Folded Reload 4154 ; CHECK-BASELINE-NEXT: movzwl 28(%rsi), %r9d 4155 ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r9w # 2-byte Folded Reload 4156 ; CHECK-BASELINE-NEXT: movzwl 30(%rsi), %r13d 4157 ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r13w # 2-byte Folded Reload 4158 ; CHECK-BASELINE-NEXT: andw 30(%r8), %r13w 4159 ; CHECK-BASELINE-NEXT: andw 28(%r8), %r9w 4160 ; CHECK-BASELINE-NEXT: andw 26(%r8), %r10w 4161 ; CHECK-BASELINE-NEXT: andw 24(%r8), %r11w 4162 ; CHECK-BASELINE-NEXT: andw 22(%r8), %bx 4163 ; CHECK-BASELINE-NEXT: andw 20(%r8), %bp 4164 ; CHECK-BASELINE-NEXT: andw 18(%r8), %r14w 4165 ; CHECK-BASELINE-NEXT: andw 16(%r8), %r15w 4166 ; CHECK-BASELINE-NEXT: andw 14(%r8), %r12w 4167 ; CHECK-BASELINE-NEXT: andw 12(%r8), %dx 4168 ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4169 ; CHECK-BASELINE-NEXT: andw 10(%r8), %cx 4170 ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4171 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 4172 ; CHECK-BASELINE-NEXT: andw 8(%r8), %dx 4173 ; CHECK-BASELINE-NEXT: andw 6(%r8), %ax 4174 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4175 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 4176 ; CHECK-BASELINE-NEXT: andw 4(%r8), %cx 4177 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4178 ; CHECK-BASELINE-NEXT: andw 2(%r8), %ax 4179 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload 4180 ; CHECK-BASELINE-NEXT: andw (%r8), %si 4181 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload 4182 ; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4183 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 4184 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4185 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload 4186 ; CHECK-BASELINE-NEXT: movl %ecx, %esi 4187 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 4188 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload 4189 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4190 ; CHECK-BASELINE-NEXT: movl %edx, %r8d 4191 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 4192 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4193 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4194 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 4195 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload 4196 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload 4197 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload 4198 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload 4199 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload 4200 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload 4201 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload 4202 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload 4203 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload 4204 ; CHECK-BASELINE-NEXT: movw %r13w, 30(%rdi) 4205 ; CHECK-BASELINE-NEXT: movw %r9w, 28(%rdi) 4206 ; CHECK-BASELINE-NEXT: movw %r10w, 26(%rdi) 4207 ; CHECK-BASELINE-NEXT: movw %r11w, 24(%rdi) 4208 ; CHECK-BASELINE-NEXT: movw %bx, 22(%rdi) 4209 ; CHECK-BASELINE-NEXT: movw %bp, 20(%rdi) 4210 ; CHECK-BASELINE-NEXT: movw %r14w, 18(%rdi) 4211 ; CHECK-BASELINE-NEXT: movw %r15w, 16(%rdi) 4212 ; CHECK-BASELINE-NEXT: movw %r12w, 14(%rdi) 4213 ; CHECK-BASELINE-NEXT: movw %ax, 12(%rdi) 4214 ; CHECK-BASELINE-NEXT: movw %dx, 10(%rdi) 4215 ; CHECK-BASELINE-NEXT: movw %r8w, 8(%rdi) 4216 ; CHECK-BASELINE-NEXT: movw %cx, 6(%rdi) 4217 ; CHECK-BASELINE-NEXT: movw %si, 4(%rdi) 4218 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4219 ; CHECK-BASELINE-NEXT: movw %ax, 2(%rdi) 4220 ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4221 ; CHECK-BASELINE-NEXT: movw %ax, (%rdi) 4222 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 4223 ; CHECK-BASELINE-NEXT: popq %rbx 4224 ; CHECK-BASELINE-NEXT: popq %r12 4225 ; CHECK-BASELINE-NEXT: popq %r13 4226 ; CHECK-BASELINE-NEXT: popq %r14 4227 ; CHECK-BASELINE-NEXT: popq %r15 4228 ; CHECK-BASELINE-NEXT: popq %rbp 4229 ; CHECK-BASELINE-NEXT: retq 4230 ; 4231 ; CHECK-SSE1-LABEL: in_v16i16: 4232 ; CHECK-SSE1: # %bb.0: 4233 ; CHECK-SSE1-NEXT: pushq %rbp 4234 ; CHECK-SSE1-NEXT: pushq %r15 4235 ; CHECK-SSE1-NEXT: pushq %r14 4236 ; CHECK-SSE1-NEXT: pushq %r13 4237 ; CHECK-SSE1-NEXT: pushq %r12 4238 ; CHECK-SSE1-NEXT: pushq %rbx 4239 ; CHECK-SSE1-NEXT: movq %rcx, %r8 4240 ; CHECK-SSE1-NEXT: movzwl 30(%rdx), %eax 4241 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4242 ; CHECK-SSE1-NEXT: movl 28(%rdx), %eax 4243 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4244 ; CHECK-SSE1-NEXT: movzwl 26(%rdx), %eax 4245 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4246 ; CHECK-SSE1-NEXT: movl 24(%rdx), %eax 4247 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4248 ; CHECK-SSE1-NEXT: movzwl 22(%rdx), %eax 4249 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4250 ; CHECK-SSE1-NEXT: movl 20(%rdx), %r13d 4251 ; CHECK-SSE1-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4252 ; CHECK-SSE1-NEXT: movzwl 18(%rdx), %r11d 4253 ; CHECK-SSE1-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4254 ; CHECK-SSE1-NEXT: movl 16(%rdx), %r14d 4255 ; CHECK-SSE1-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4256 ; CHECK-SSE1-NEXT: movzwl 14(%rdx), %r15d 4257 ; CHECK-SSE1-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4258 ; CHECK-SSE1-NEXT: movl 12(%rdx), %r12d 4259 ; CHECK-SSE1-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4260 ; CHECK-SSE1-NEXT: movzwl 10(%rdx), %r10d 4261 ; CHECK-SSE1-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4262 ; CHECK-SSE1-NEXT: movl 8(%rdx), %r9d 4263 ; CHECK-SSE1-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4264 ; CHECK-SSE1-NEXT: movzwl 6(%rdx), %ebx 4265 ; CHECK-SSE1-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4266 ; CHECK-SSE1-NEXT: movl (%rdx), %eax 4267 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4268 ; CHECK-SSE1-NEXT: movl 4(%rdx), %ebp 4269 ; CHECK-SSE1-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4270 ; CHECK-SSE1-NEXT: movzwl 2(%rdx), %ecx 4271 ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4272 ; CHECK-SSE1-NEXT: movzwl (%rsi), %edx 4273 ; CHECK-SSE1-NEXT: xorw %ax, %dx 4274 ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4275 ; CHECK-SSE1-NEXT: movzwl 2(%rsi), %eax 4276 ; CHECK-SSE1-NEXT: xorw %cx, %ax 4277 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4278 ; CHECK-SSE1-NEXT: movzwl 4(%rsi), %eax 4279 ; CHECK-SSE1-NEXT: xorw %bp, %ax 4280 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4281 ; CHECK-SSE1-NEXT: movzwl 6(%rsi), %edx 4282 ; CHECK-SSE1-NEXT: xorw %bx, %dx 4283 ; CHECK-SSE1-NEXT: movl %edx, %eax 4284 ; CHECK-SSE1-NEXT: movzwl 8(%rsi), %ecx 4285 ; CHECK-SSE1-NEXT: xorw %r9w, %cx 4286 ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4287 ; CHECK-SSE1-NEXT: movzwl 10(%rsi), %edx 4288 ; CHECK-SSE1-NEXT: xorw %r10w, %dx 4289 ; CHECK-SSE1-NEXT: movl %edx, %ecx 4290 ; CHECK-SSE1-NEXT: movzwl 12(%rsi), %edx 4291 ; CHECK-SSE1-NEXT: xorw %r12w, %dx 4292 ; CHECK-SSE1-NEXT: movzwl 14(%rsi), %r12d 4293 ; CHECK-SSE1-NEXT: xorw %r15w, %r12w 4294 ; CHECK-SSE1-NEXT: movzwl 16(%rsi), %r15d 4295 ; CHECK-SSE1-NEXT: xorw %r14w, %r15w 4296 ; CHECK-SSE1-NEXT: movzwl 18(%rsi), %r14d 4297 ; CHECK-SSE1-NEXT: xorw %r11w, %r14w 4298 ; CHECK-SSE1-NEXT: movzwl 20(%rsi), %ebp 4299 ; CHECK-SSE1-NEXT: xorw %r13w, %bp 4300 ; CHECK-SSE1-NEXT: movzwl 22(%rsi), %ebx 4301 ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload 4302 ; CHECK-SSE1-NEXT: movzwl 24(%rsi), %r11d 4303 ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload 4304 ; CHECK-SSE1-NEXT: movzwl 26(%rsi), %r10d 4305 ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r10w # 2-byte Folded Reload 4306 ; CHECK-SSE1-NEXT: movzwl 28(%rsi), %r9d 4307 ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r9w # 2-byte Folded Reload 4308 ; CHECK-SSE1-NEXT: movzwl 30(%rsi), %r13d 4309 ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r13w # 2-byte Folded Reload 4310 ; CHECK-SSE1-NEXT: andw 30(%r8), %r13w 4311 ; CHECK-SSE1-NEXT: andw 28(%r8), %r9w 4312 ; CHECK-SSE1-NEXT: andw 26(%r8), %r10w 4313 ; CHECK-SSE1-NEXT: andw 24(%r8), %r11w 4314 ; CHECK-SSE1-NEXT: andw 22(%r8), %bx 4315 ; CHECK-SSE1-NEXT: andw 20(%r8), %bp 4316 ; CHECK-SSE1-NEXT: andw 18(%r8), %r14w 4317 ; CHECK-SSE1-NEXT: andw 16(%r8), %r15w 4318 ; CHECK-SSE1-NEXT: andw 14(%r8), %r12w 4319 ; CHECK-SSE1-NEXT: andw 12(%r8), %dx 4320 ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4321 ; CHECK-SSE1-NEXT: andw 10(%r8), %cx 4322 ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4323 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 4324 ; CHECK-SSE1-NEXT: andw 8(%r8), %dx 4325 ; CHECK-SSE1-NEXT: andw 6(%r8), %ax 4326 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4327 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 4328 ; CHECK-SSE1-NEXT: andw 4(%r8), %cx 4329 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4330 ; CHECK-SSE1-NEXT: andw 2(%r8), %ax 4331 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload 4332 ; CHECK-SSE1-NEXT: andw (%r8), %si 4333 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload 4334 ; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4335 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 4336 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4337 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload 4338 ; CHECK-SSE1-NEXT: movl %ecx, %esi 4339 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 4340 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload 4341 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4342 ; CHECK-SSE1-NEXT: movl %edx, %r8d 4343 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 4344 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4345 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4346 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 4347 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload 4348 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload 4349 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload 4350 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload 4351 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload 4352 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload 4353 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Folded Reload 4354 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload 4355 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload 4356 ; CHECK-SSE1-NEXT: movw %r13w, 30(%rdi) 4357 ; CHECK-SSE1-NEXT: movw %r9w, 28(%rdi) 4358 ; CHECK-SSE1-NEXT: movw %r10w, 26(%rdi) 4359 ; CHECK-SSE1-NEXT: movw %r11w, 24(%rdi) 4360 ; CHECK-SSE1-NEXT: movw %bx, 22(%rdi) 4361 ; CHECK-SSE1-NEXT: movw %bp, 20(%rdi) 4362 ; CHECK-SSE1-NEXT: movw %r14w, 18(%rdi) 4363 ; CHECK-SSE1-NEXT: movw %r15w, 16(%rdi) 4364 ; CHECK-SSE1-NEXT: movw %r12w, 14(%rdi) 4365 ; CHECK-SSE1-NEXT: movw %ax, 12(%rdi) 4366 ; CHECK-SSE1-NEXT: movw %dx, 10(%rdi) 4367 ; CHECK-SSE1-NEXT: movw %r8w, 8(%rdi) 4368 ; CHECK-SSE1-NEXT: movw %cx, 6(%rdi) 4369 ; CHECK-SSE1-NEXT: movw %si, 4(%rdi) 4370 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4371 ; CHECK-SSE1-NEXT: movw %ax, 2(%rdi) 4372 ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4373 ; CHECK-SSE1-NEXT: movw %ax, (%rdi) 4374 ; CHECK-SSE1-NEXT: movq %rdi, %rax 4375 ; CHECK-SSE1-NEXT: popq %rbx 4376 ; CHECK-SSE1-NEXT: popq %r12 4377 ; CHECK-SSE1-NEXT: popq %r13 4378 ; CHECK-SSE1-NEXT: popq %r14 4379 ; CHECK-SSE1-NEXT: popq %r15 4380 ; CHECK-SSE1-NEXT: popq %rbp 4381 ; CHECK-SSE1-NEXT: retq 4382 ; 4383 ; CHECK-SSE2-LABEL: in_v16i16: 4384 ; CHECK-SSE2: # %bb.0: 4385 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4386 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4387 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4388 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4389 ; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4390 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4391 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4392 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4393 ; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4394 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4395 ; CHECK-SSE2-NEXT: retq 4396 ; 4397 ; CHECK-XOP-LABEL: in_v16i16: 4398 ; CHECK-XOP: # %bb.0: 4399 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4400 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4401 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4402 ; CHECK-XOP-NEXT: retq 4403 %x = load <16 x i16>, <16 x i16> *%px, align 32 4404 %y = load <16 x i16>, <16 x i16> *%py, align 32 4405 %mask = load <16 x i16>, <16 x i16> *%pmask, align 32 4406 %n0 = xor <16 x i16> %x, %y 4407 %n1 = and <16 x i16> %n0, %mask 4408 %r = xor <16 x i16> %n1, %y 4409 ret <16 x i16> %r 4410 } 4411 4412 define <8 x i32> @in_v8i32(<8 x i32> *%px, <8 x i32> *%py, <8 x i32> *%pmask) nounwind { 4413 ; CHECK-BASELINE-LABEL: in_v8i32: 4414 ; CHECK-BASELINE: # %bb.0: 4415 ; CHECK-BASELINE-NEXT: pushq %rbp 4416 ; CHECK-BASELINE-NEXT: pushq %r15 4417 ; CHECK-BASELINE-NEXT: pushq %r14 4418 ; CHECK-BASELINE-NEXT: pushq %r13 4419 ; CHECK-BASELINE-NEXT: pushq %r12 4420 ; CHECK-BASELINE-NEXT: pushq %rbx 4421 ; CHECK-BASELINE-NEXT: movl 28(%rdx), %r15d 4422 ; CHECK-BASELINE-NEXT: movl 24(%rdx), %r14d 4423 ; CHECK-BASELINE-NEXT: movl 20(%rdx), %r10d 4424 ; CHECK-BASELINE-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4425 ; CHECK-BASELINE-NEXT: movl 16(%rdx), %eax 4426 ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4427 ; CHECK-BASELINE-NEXT: movl 12(%rdx), %ebp 4428 ; CHECK-BASELINE-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4429 ; CHECK-BASELINE-NEXT: movl 8(%rdx), %ebx 4430 ; CHECK-BASELINE-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4431 ; CHECK-BASELINE-NEXT: movl (%rdx), %r12d 4432 ; CHECK-BASELINE-NEXT: movl 4(%rdx), %r13d 4433 ; CHECK-BASELINE-NEXT: movl (%rsi), %r11d 4434 ; CHECK-BASELINE-NEXT: xorl %r12d, %r11d 4435 ; CHECK-BASELINE-NEXT: movl 4(%rsi), %r9d 4436 ; CHECK-BASELINE-NEXT: xorl %r13d, %r9d 4437 ; CHECK-BASELINE-NEXT: movl 8(%rsi), %r8d 4438 ; CHECK-BASELINE-NEXT: xorl %ebx, %r8d 4439 ; CHECK-BASELINE-NEXT: movl 12(%rsi), %ebx 4440 ; CHECK-BASELINE-NEXT: xorl %ebp, %ebx 4441 ; CHECK-BASELINE-NEXT: movl 16(%rsi), %ebp 4442 ; CHECK-BASELINE-NEXT: xorl %eax, %ebp 4443 ; CHECK-BASELINE-NEXT: movl 20(%rsi), %edx 4444 ; CHECK-BASELINE-NEXT: xorl %r10d, %edx 4445 ; CHECK-BASELINE-NEXT: movl 24(%rsi), %eax 4446 ; CHECK-BASELINE-NEXT: xorl %r14d, %eax 4447 ; CHECK-BASELINE-NEXT: movl 28(%rsi), %esi 4448 ; CHECK-BASELINE-NEXT: xorl %r15d, %esi 4449 ; CHECK-BASELINE-NEXT: andl 28(%rcx), %esi 4450 ; CHECK-BASELINE-NEXT: andl 24(%rcx), %eax 4451 ; CHECK-BASELINE-NEXT: andl 20(%rcx), %edx 4452 ; CHECK-BASELINE-NEXT: andl 16(%rcx), %ebp 4453 ; CHECK-BASELINE-NEXT: andl 12(%rcx), %ebx 4454 ; CHECK-BASELINE-NEXT: andl 8(%rcx), %r8d 4455 ; CHECK-BASELINE-NEXT: andl 4(%rcx), %r9d 4456 ; CHECK-BASELINE-NEXT: andl (%rcx), %r11d 4457 ; CHECK-BASELINE-NEXT: xorl %r12d, %r11d 4458 ; CHECK-BASELINE-NEXT: xorl %r13d, %r9d 4459 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload 4460 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload 4461 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload 4462 ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4463 ; CHECK-BASELINE-NEXT: xorl %r14d, %eax 4464 ; CHECK-BASELINE-NEXT: xorl %r15d, %esi 4465 ; CHECK-BASELINE-NEXT: movl %esi, 28(%rdi) 4466 ; CHECK-BASELINE-NEXT: movl %eax, 24(%rdi) 4467 ; CHECK-BASELINE-NEXT: movl %edx, 20(%rdi) 4468 ; CHECK-BASELINE-NEXT: movl %ebp, 16(%rdi) 4469 ; CHECK-BASELINE-NEXT: movl %ebx, 12(%rdi) 4470 ; CHECK-BASELINE-NEXT: movl %r8d, 8(%rdi) 4471 ; CHECK-BASELINE-NEXT: movl %r9d, 4(%rdi) 4472 ; CHECK-BASELINE-NEXT: movl %r11d, (%rdi) 4473 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 4474 ; CHECK-BASELINE-NEXT: popq %rbx 4475 ; CHECK-BASELINE-NEXT: popq %r12 4476 ; CHECK-BASELINE-NEXT: popq %r13 4477 ; CHECK-BASELINE-NEXT: popq %r14 4478 ; CHECK-BASELINE-NEXT: popq %r15 4479 ; CHECK-BASELINE-NEXT: popq %rbp 4480 ; CHECK-BASELINE-NEXT: retq 4481 ; 4482 ; CHECK-SSE1-LABEL: in_v8i32: 4483 ; CHECK-SSE1: # %bb.0: 4484 ; CHECK-SSE1-NEXT: pushq %rbp 4485 ; CHECK-SSE1-NEXT: pushq %r15 4486 ; CHECK-SSE1-NEXT: pushq %r14 4487 ; CHECK-SSE1-NEXT: pushq %r13 4488 ; CHECK-SSE1-NEXT: pushq %r12 4489 ; CHECK-SSE1-NEXT: pushq %rbx 4490 ; CHECK-SSE1-NEXT: movl 28(%rdx), %r15d 4491 ; CHECK-SSE1-NEXT: movl 24(%rdx), %r14d 4492 ; CHECK-SSE1-NEXT: movl 20(%rdx), %r10d 4493 ; CHECK-SSE1-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4494 ; CHECK-SSE1-NEXT: movl 16(%rdx), %eax 4495 ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4496 ; CHECK-SSE1-NEXT: movl 12(%rdx), %ebp 4497 ; CHECK-SSE1-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4498 ; CHECK-SSE1-NEXT: movl 8(%rdx), %ebx 4499 ; CHECK-SSE1-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4500 ; CHECK-SSE1-NEXT: movl (%rdx), %r12d 4501 ; CHECK-SSE1-NEXT: movl 4(%rdx), %r13d 4502 ; CHECK-SSE1-NEXT: movl (%rsi), %r11d 4503 ; CHECK-SSE1-NEXT: xorl %r12d, %r11d 4504 ; CHECK-SSE1-NEXT: movl 4(%rsi), %r9d 4505 ; CHECK-SSE1-NEXT: xorl %r13d, %r9d 4506 ; CHECK-SSE1-NEXT: movl 8(%rsi), %r8d 4507 ; CHECK-SSE1-NEXT: xorl %ebx, %r8d 4508 ; CHECK-SSE1-NEXT: movl 12(%rsi), %ebx 4509 ; CHECK-SSE1-NEXT: xorl %ebp, %ebx 4510 ; CHECK-SSE1-NEXT: movl 16(%rsi), %ebp 4511 ; CHECK-SSE1-NEXT: xorl %eax, %ebp 4512 ; CHECK-SSE1-NEXT: movl 20(%rsi), %edx 4513 ; CHECK-SSE1-NEXT: xorl %r10d, %edx 4514 ; CHECK-SSE1-NEXT: movl 24(%rsi), %eax 4515 ; CHECK-SSE1-NEXT: xorl %r14d, %eax 4516 ; CHECK-SSE1-NEXT: movl 28(%rsi), %esi 4517 ; CHECK-SSE1-NEXT: xorl %r15d, %esi 4518 ; CHECK-SSE1-NEXT: andl 28(%rcx), %esi 4519 ; CHECK-SSE1-NEXT: andl 24(%rcx), %eax 4520 ; CHECK-SSE1-NEXT: andl 20(%rcx), %edx 4521 ; CHECK-SSE1-NEXT: andl 16(%rcx), %ebp 4522 ; CHECK-SSE1-NEXT: andl 12(%rcx), %ebx 4523 ; CHECK-SSE1-NEXT: andl 8(%rcx), %r8d 4524 ; CHECK-SSE1-NEXT: andl 4(%rcx), %r9d 4525 ; CHECK-SSE1-NEXT: andl (%rcx), %r11d 4526 ; CHECK-SSE1-NEXT: xorl %r12d, %r11d 4527 ; CHECK-SSE1-NEXT: xorl %r13d, %r9d 4528 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload 4529 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload 4530 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload 4531 ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4532 ; CHECK-SSE1-NEXT: xorl %r14d, %eax 4533 ; CHECK-SSE1-NEXT: xorl %r15d, %esi 4534 ; CHECK-SSE1-NEXT: movl %esi, 28(%rdi) 4535 ; CHECK-SSE1-NEXT: movl %eax, 24(%rdi) 4536 ; CHECK-SSE1-NEXT: movl %edx, 20(%rdi) 4537 ; CHECK-SSE1-NEXT: movl %ebp, 16(%rdi) 4538 ; CHECK-SSE1-NEXT: movl %ebx, 12(%rdi) 4539 ; CHECK-SSE1-NEXT: movl %r8d, 8(%rdi) 4540 ; CHECK-SSE1-NEXT: movl %r9d, 4(%rdi) 4541 ; CHECK-SSE1-NEXT: movl %r11d, (%rdi) 4542 ; CHECK-SSE1-NEXT: movq %rdi, %rax 4543 ; CHECK-SSE1-NEXT: popq %rbx 4544 ; CHECK-SSE1-NEXT: popq %r12 4545 ; CHECK-SSE1-NEXT: popq %r13 4546 ; CHECK-SSE1-NEXT: popq %r14 4547 ; CHECK-SSE1-NEXT: popq %r15 4548 ; CHECK-SSE1-NEXT: popq %rbp 4549 ; CHECK-SSE1-NEXT: retq 4550 ; 4551 ; CHECK-SSE2-LABEL: in_v8i32: 4552 ; CHECK-SSE2: # %bb.0: 4553 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4554 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4555 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4556 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4557 ; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4558 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4559 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4560 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4561 ; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4562 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4563 ; CHECK-SSE2-NEXT: retq 4564 ; 4565 ; CHECK-XOP-LABEL: in_v8i32: 4566 ; CHECK-XOP: # %bb.0: 4567 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4568 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4569 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4570 ; CHECK-XOP-NEXT: retq 4571 %x = load <8 x i32>, <8 x i32> *%px, align 32 4572 %y = load <8 x i32>, <8 x i32> *%py, align 32 4573 %mask = load <8 x i32>, <8 x i32> *%pmask, align 32 4574 %n0 = xor <8 x i32> %x, %y 4575 %n1 = and <8 x i32> %n0, %mask 4576 %r = xor <8 x i32> %n1, %y 4577 ret <8 x i32> %r 4578 } 4579 4580 define <4 x i64> @in_v4i64(<4 x i64> *%px, <4 x i64> *%py, <4 x i64> *%pmask) nounwind { 4581 ; CHECK-BASELINE-LABEL: in_v4i64: 4582 ; CHECK-BASELINE: # %bb.0: 4583 ; CHECK-BASELINE-NEXT: pushq %rbx 4584 ; CHECK-BASELINE-NEXT: movq 24(%rdx), %r8 4585 ; CHECK-BASELINE-NEXT: movq 16(%rdx), %r9 4586 ; CHECK-BASELINE-NEXT: movq (%rdx), %r11 4587 ; CHECK-BASELINE-NEXT: movq 8(%rdx), %r10 4588 ; CHECK-BASELINE-NEXT: movq (%rsi), %rdx 4589 ; CHECK-BASELINE-NEXT: xorq %r11, %rdx 4590 ; CHECK-BASELINE-NEXT: movq 8(%rsi), %rax 4591 ; CHECK-BASELINE-NEXT: xorq %r10, %rax 4592 ; CHECK-BASELINE-NEXT: movq 16(%rsi), %rbx 4593 ; CHECK-BASELINE-NEXT: xorq %r9, %rbx 4594 ; CHECK-BASELINE-NEXT: movq 24(%rsi), %rsi 4595 ; CHECK-BASELINE-NEXT: xorq %r8, %rsi 4596 ; CHECK-BASELINE-NEXT: andq 24(%rcx), %rsi 4597 ; CHECK-BASELINE-NEXT: andq 16(%rcx), %rbx 4598 ; CHECK-BASELINE-NEXT: andq 8(%rcx), %rax 4599 ; CHECK-BASELINE-NEXT: andq (%rcx), %rdx 4600 ; CHECK-BASELINE-NEXT: xorq %r11, %rdx 4601 ; CHECK-BASELINE-NEXT: xorq %r10, %rax 4602 ; CHECK-BASELINE-NEXT: xorq %r9, %rbx 4603 ; CHECK-BASELINE-NEXT: xorq %r8, %rsi 4604 ; CHECK-BASELINE-NEXT: movq %rsi, 24(%rdi) 4605 ; CHECK-BASELINE-NEXT: movq %rbx, 16(%rdi) 4606 ; CHECK-BASELINE-NEXT: movq %rax, 8(%rdi) 4607 ; CHECK-BASELINE-NEXT: movq %rdx, (%rdi) 4608 ; CHECK-BASELINE-NEXT: movq %rdi, %rax 4609 ; CHECK-BASELINE-NEXT: popq %rbx 4610 ; CHECK-BASELINE-NEXT: retq 4611 ; 4612 ; CHECK-SSE1-LABEL: in_v4i64: 4613 ; CHECK-SSE1: # %bb.0: 4614 ; CHECK-SSE1-NEXT: pushq %rbx 4615 ; CHECK-SSE1-NEXT: movq 24(%rdx), %r8 4616 ; CHECK-SSE1-NEXT: movq 16(%rdx), %r9 4617 ; CHECK-SSE1-NEXT: movq (%rdx), %r11 4618 ; CHECK-SSE1-NEXT: movq 8(%rdx), %r10 4619 ; CHECK-SSE1-NEXT: movq (%rsi), %rdx 4620 ; CHECK-SSE1-NEXT: xorq %r11, %rdx 4621 ; CHECK-SSE1-NEXT: movq 8(%rsi), %rax 4622 ; CHECK-SSE1-NEXT: xorq %r10, %rax 4623 ; CHECK-SSE1-NEXT: movq 16(%rsi), %rbx 4624 ; CHECK-SSE1-NEXT: xorq %r9, %rbx 4625 ; CHECK-SSE1-NEXT: movq 24(%rsi), %rsi 4626 ; CHECK-SSE1-NEXT: xorq %r8, %rsi 4627 ; CHECK-SSE1-NEXT: andq 24(%rcx), %rsi 4628 ; CHECK-SSE1-NEXT: andq 16(%rcx), %rbx 4629 ; CHECK-SSE1-NEXT: andq 8(%rcx), %rax 4630 ; CHECK-SSE1-NEXT: andq (%rcx), %rdx 4631 ; CHECK-SSE1-NEXT: xorq %r11, %rdx 4632 ; CHECK-SSE1-NEXT: xorq %r10, %rax 4633 ; CHECK-SSE1-NEXT: xorq %r9, %rbx 4634 ; CHECK-SSE1-NEXT: xorq %r8, %rsi 4635 ; CHECK-SSE1-NEXT: movq %rsi, 24(%rdi) 4636 ; CHECK-SSE1-NEXT: movq %rbx, 16(%rdi) 4637 ; CHECK-SSE1-NEXT: movq %rax, 8(%rdi) 4638 ; CHECK-SSE1-NEXT: movq %rdx, (%rdi) 4639 ; CHECK-SSE1-NEXT: movq %rdi, %rax 4640 ; CHECK-SSE1-NEXT: popq %rbx 4641 ; CHECK-SSE1-NEXT: retq 4642 ; 4643 ; CHECK-SSE2-LABEL: in_v4i64: 4644 ; CHECK-SSE2: # %bb.0: 4645 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4646 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4647 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4648 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4649 ; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4650 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4651 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4652 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4653 ; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4654 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4655 ; CHECK-SSE2-NEXT: retq 4656 ; 4657 ; CHECK-XOP-LABEL: in_v4i64: 4658 ; CHECK-XOP: # %bb.0: 4659 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4660 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4661 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4662 ; CHECK-XOP-NEXT: retq 4663 %x = load <4 x i64>, <4 x i64> *%px, align 32 4664 %y = load <4 x i64>, <4 x i64> *%py, align 32 4665 %mask = load <4 x i64>, <4 x i64> *%pmask, align 32 4666 %n0 = xor <4 x i64> %x, %y 4667 %n1 = and <4 x i64> %n0, %mask 4668 %r = xor <4 x i64> %n1, %y 4669 ret <4 x i64> %r 4670 } 4671