1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE1 3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2 4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP 5 6 ; ============================================================================ ; 7 ; Various cases with %x and/or %y being a constant 8 ; ============================================================================ ; 9 10 define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 11 ; CHECK-SSE1-LABEL: out_constant_varx_mone: 12 ; CHECK-SSE1: # %bb.0: 13 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 14 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan] 15 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 16 ; CHECK-SSE1-NEXT: andps (%rsi), %xmm0 17 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 18 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 19 ; CHECK-SSE1-NEXT: movq %rdi, %rax 20 ; CHECK-SSE1-NEXT: retq 21 ; 22 ; CHECK-SSE2-LABEL: out_constant_varx_mone: 23 ; CHECK-SSE2: # %bb.0: 24 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 25 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 26 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 27 ; CHECK-SSE2-NEXT: pand (%rdi), %xmm0 28 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0 29 ; CHECK-SSE2-NEXT: retq 30 ; 31 ; CHECK-XOP-LABEL: out_constant_varx_mone: 32 ; CHECK-XOP: # %bb.0: 33 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 34 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 35 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 36 ; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0 37 ; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 38 ; CHECK-XOP-NEXT: retq 39 %x = load <4 x i32>, <4 x i32> *%px, align 16 40 %y = load <4 x i32>, <4 x i32> *%py, align 16 41 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 42 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 43 %mx = and <4 x i32> %mask, %x 44 %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1> 45 %r = or <4 x i32> %mx, %my 46 ret <4 x i32> %r 47 } 48 49 define <4 x i32> @in_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 50 ; CHECK-SSE1-LABEL: in_constant_varx_mone: 51 ; CHECK-SSE1: # %bb.0: 52 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 53 ; CHECK-SSE1-NEXT: andnps (%rcx), %xmm0 54 ; CHECK-SSE1-NEXT: xorps {{.*}}(%rip), %xmm0 55 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 56 ; CHECK-SSE1-NEXT: movq %rdi, %rax 57 ; CHECK-SSE1-NEXT: retq 58 ; 59 ; CHECK-SSE2-LABEL: in_constant_varx_mone: 60 ; CHECK-SSE2: # %bb.0: 61 ; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0 62 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 63 ; CHECK-SSE2-NEXT: pandn (%rdx), %xmm0 64 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 65 ; CHECK-SSE2-NEXT: retq 66 ; 67 ; CHECK-XOP-LABEL: in_constant_varx_mone: 68 ; CHECK-XOP: # %bb.0: 69 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 70 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 71 ; CHECK-XOP-NEXT: vpandn (%rdx), %xmm0, %xmm0 72 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0 73 ; CHECK-XOP-NEXT: retq 74 %x = load <4 x i32>, <4 x i32> *%px, align 16 75 %y = load <4 x i32>, <4 x i32> *%py, align 16 76 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 77 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x 78 %n1 = and <4 x i32> %n0, %mask 79 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1> 80 ret <4 x i32> %r 81 } 82 83 ; This is not a canonical form. Testing for completeness only. 84 define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 85 ; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask: 86 ; CHECK-SSE1: # %bb.0: 87 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 88 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 89 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 90 ; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 91 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) 92 ; CHECK-SSE1-NEXT: movq %rdi, %rax 93 ; CHECK-SSE1-NEXT: retq 94 ; 95 ; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask: 96 ; CHECK-SSE2: # %bb.0: 97 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1 98 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 99 ; CHECK-SSE2-NEXT: andnps (%rdi), %xmm0 100 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 101 ; CHECK-SSE2-NEXT: retq 102 ; 103 ; CHECK-XOP-LABEL: out_constant_varx_mone_invmask: 104 ; CHECK-XOP: # %bb.0: 105 ; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0 106 ; CHECK-XOP-NEXT: vandnps (%rdi), %xmm0, %xmm1 107 ; CHECK-XOP-NEXT: vorps %xmm0, %xmm1, %xmm0 108 ; CHECK-XOP-NEXT: retq 109 %x = load <4 x i32>, <4 x i32> *%px, align 16 110 %y = load <4 x i32>, <4 x i32> *%py, align 16 111 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 112 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 113 %mx = and <4 x i32> %notmask, %x 114 %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 115 %r = or <4 x i32> %mx, %my 116 ret <4 x i32> %r 117 } 118 119 ; This is not a canonical form. Testing for completeness only. 120 define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 121 ; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask: 122 ; CHECK-SSE1: # %bb.0: 123 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 124 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan] 125 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2 126 ; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2 127 ; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0 128 ; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0 129 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 130 ; CHECK-SSE1-NEXT: movq %rdi, %rax 131 ; CHECK-SSE1-NEXT: retq 132 ; 133 ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask: 134 ; CHECK-SSE2: # %bb.0: 135 ; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0 136 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 137 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2 138 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2 139 ; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0 140 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 141 ; CHECK-SSE2-NEXT: retq 142 ; 143 ; CHECK-XOP-LABEL: in_constant_varx_mone_invmask: 144 ; CHECK-XOP: # %bb.0: 145 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 146 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 147 ; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2 148 ; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0 149 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0 150 ; CHECK-XOP-NEXT: retq 151 %x = load <4 x i32>, <4 x i32> *%px, align 16 152 %y = load <4 x i32>, <4 x i32> *%py, align 16 153 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 154 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 155 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x 156 %n1 = and <4 x i32> %n0, %notmask 157 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1> 158 ret <4 x i32> %r 159 } 160 161 define <4 x i32> @out_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 162 ; CHECK-SSE1-LABEL: out_constant_varx_42: 163 ; CHECK-SSE1: # %bb.0: 164 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 165 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 166 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 167 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0 168 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 169 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 170 ; CHECK-SSE1-NEXT: movq %rdi, %rax 171 ; CHECK-SSE1-NEXT: retq 172 ; 173 ; CHECK-SSE2-LABEL: out_constant_varx_42: 174 ; CHECK-SSE2: # %bb.0: 175 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 176 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 177 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 178 ; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0 179 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 180 ; CHECK-SSE2-NEXT: retq 181 ; 182 ; CHECK-XOP-LABEL: out_constant_varx_42: 183 ; CHECK-XOP: # %bb.0: 184 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 185 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 186 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0 187 ; CHECK-XOP-NEXT: retq 188 %x = load <4 x i32>, <4 x i32> *%px, align 16 189 %y = load <4 x i32>, <4 x i32> *%py, align 16 190 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 191 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 192 %mx = and <4 x i32> %mask, %x 193 %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42> 194 %r = or <4 x i32> %mx, %my 195 ret <4 x i32> %r 196 } 197 198 define <4 x i32> @in_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 199 ; CHECK-SSE1-LABEL: in_constant_varx_42: 200 ; CHECK-SSE1: # %bb.0: 201 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 202 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 203 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 204 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0 205 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 206 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 207 ; CHECK-SSE1-NEXT: movq %rdi, %rax 208 ; CHECK-SSE1-NEXT: retq 209 ; 210 ; CHECK-SSE2-LABEL: in_constant_varx_42: 211 ; CHECK-SSE2: # %bb.0: 212 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 213 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 214 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 215 ; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0 216 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 217 ; CHECK-SSE2-NEXT: retq 218 ; 219 ; CHECK-XOP-LABEL: in_constant_varx_42: 220 ; CHECK-XOP: # %bb.0: 221 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 222 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 223 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0 224 ; CHECK-XOP-NEXT: retq 225 %x = load <4 x i32>, <4 x i32> *%px, align 16 226 %y = load <4 x i32>, <4 x i32> *%py, align 16 227 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 228 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x 229 %n1 = and <4 x i32> %n0, %mask 230 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42> 231 ret <4 x i32> %r 232 } 233 234 ; This is not a canonical form. Testing for completeness only. 235 define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 236 ; CHECK-SSE1-LABEL: out_constant_varx_42_invmask: 237 ; CHECK-SSE1: # %bb.0: 238 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 239 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 240 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 241 ; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0 242 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 243 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 244 ; CHECK-SSE1-NEXT: movq %rdi, %rax 245 ; CHECK-SSE1-NEXT: retq 246 ; 247 ; CHECK-SSE2-LABEL: out_constant_varx_42_invmask: 248 ; CHECK-SSE2: # %bb.0: 249 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 250 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 251 ; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1 252 ; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0 253 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 254 ; CHECK-SSE2-NEXT: retq 255 ; 256 ; CHECK-XOP-LABEL: out_constant_varx_42_invmask: 257 ; CHECK-XOP: # %bb.0: 258 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 259 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42] 260 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0 261 ; CHECK-XOP-NEXT: retq 262 %x = load <4 x i32>, <4 x i32> *%px, align 16 263 %y = load <4 x i32>, <4 x i32> *%py, align 16 264 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 265 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 266 %mx = and <4 x i32> %notmask, %x 267 %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42> 268 %r = or <4 x i32> %mx, %my 269 ret <4 x i32> %r 270 } 271 272 ; This is not a canonical form. Testing for completeness only. 273 define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 274 ; CHECK-SSE1-LABEL: in_constant_varx_42_invmask: 275 ; CHECK-SSE1: # %bb.0: 276 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 277 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 278 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 279 ; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0 280 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 281 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 282 ; CHECK-SSE1-NEXT: movq %rdi, %rax 283 ; CHECK-SSE1-NEXT: retq 284 ; 285 ; CHECK-SSE2-LABEL: in_constant_varx_42_invmask: 286 ; CHECK-SSE2: # %bb.0: 287 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 288 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 289 ; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1 290 ; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0 291 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 292 ; CHECK-SSE2-NEXT: retq 293 ; 294 ; CHECK-XOP-LABEL: in_constant_varx_42_invmask: 295 ; CHECK-XOP: # %bb.0: 296 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 297 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42] 298 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0 299 ; CHECK-XOP-NEXT: retq 300 %x = load <4 x i32>, <4 x i32> *%px, align 16 301 %y = load <4 x i32>, <4 x i32> *%py, align 16 302 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 303 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 304 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x 305 %n1 = and <4 x i32> %n0, %notmask 306 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42> 307 ret <4 x i32> %r 308 } 309 310 define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 311 ; CHECK-SSE1-LABEL: out_constant_mone_vary: 312 ; CHECK-SSE1: # %bb.0: 313 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 314 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 315 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 316 ; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 317 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) 318 ; CHECK-SSE1-NEXT: movq %rdi, %rax 319 ; CHECK-SSE1-NEXT: retq 320 ; 321 ; CHECK-SSE2-LABEL: out_constant_mone_vary: 322 ; CHECK-SSE2: # %bb.0: 323 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1 324 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 325 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 326 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 327 ; CHECK-SSE2-NEXT: retq 328 ; 329 ; CHECK-XOP-LABEL: out_constant_mone_vary: 330 ; CHECK-XOP: # %bb.0: 331 ; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0 332 ; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1 333 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 334 ; CHECK-XOP-NEXT: retq 335 %x = load <4 x i32>, <4 x i32> *%px, align 16 336 %y = load <4 x i32>, <4 x i32> *%py, align 16 337 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 338 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 339 %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 340 %my = and <4 x i32> %notmask, %y 341 %r = or <4 x i32> %mx, %my 342 ret <4 x i32> %r 343 } 344 345 define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 346 ; CHECK-SSE1-LABEL: in_constant_mone_vary: 347 ; CHECK-SSE1: # %bb.0: 348 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 349 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 350 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 351 ; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 352 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) 353 ; CHECK-SSE1-NEXT: movq %rdi, %rax 354 ; CHECK-SSE1-NEXT: retq 355 ; 356 ; CHECK-SSE2-LABEL: in_constant_mone_vary: 357 ; CHECK-SSE2: # %bb.0: 358 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1 359 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 360 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 361 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 362 ; CHECK-SSE2-NEXT: retq 363 ; 364 ; CHECK-XOP-LABEL: in_constant_mone_vary: 365 ; CHECK-XOP: # %bb.0: 366 ; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0 367 ; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1 368 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 369 ; CHECK-XOP-NEXT: retq 370 %x = load <4 x i32>, <4 x i32> *%px, align 16 371 %y = load <4 x i32>, <4 x i32> *%py, align 16 372 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 373 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x 374 %n1 = and <4 x i32> %n0, %mask 375 %r = xor <4 x i32> %n1, %y 376 ret <4 x i32> %r 377 } 378 379 ; This is not a canonical form. Testing for completeness only. 380 define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 381 ; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask: 382 ; CHECK-SSE1: # %bb.0: 383 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 384 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan] 385 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 386 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 387 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 388 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 389 ; CHECK-SSE1-NEXT: movq %rdi, %rax 390 ; CHECK-SSE1-NEXT: retq 391 ; 392 ; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask: 393 ; CHECK-SSE2: # %bb.0: 394 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 395 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 396 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 397 ; CHECK-SSE2-NEXT: pand (%rsi), %xmm0 398 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0 399 ; CHECK-SSE2-NEXT: retq 400 ; 401 ; CHECK-XOP-LABEL: out_constant_mone_vary_invmask: 402 ; CHECK-XOP: # %bb.0: 403 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 404 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 405 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 406 ; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0 407 ; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 408 ; CHECK-XOP-NEXT: retq 409 %x = load <4 x i32>, <4 x i32> *%px, align 16 410 %y = load <4 x i32>, <4 x i32> *%py, align 16 411 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 412 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 413 %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1> 414 %my = and <4 x i32> %mask, %y 415 %r = or <4 x i32> %mx, %my 416 ret <4 x i32> %r 417 } 418 419 ; This is not a canonical form. Testing for completeness only. 420 define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 421 ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask: 422 ; CHECK-SSE1: # %bb.0: 423 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 424 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan] 425 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 426 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 427 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 428 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 429 ; CHECK-SSE1-NEXT: movq %rdi, %rax 430 ; CHECK-SSE1-NEXT: retq 431 ; 432 ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask: 433 ; CHECK-SSE2: # %bb.0: 434 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 435 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 436 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 437 ; CHECK-SSE2-NEXT: pand (%rsi), %xmm0 438 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0 439 ; CHECK-SSE2-NEXT: retq 440 ; 441 ; CHECK-XOP-LABEL: in_constant_mone_vary_invmask: 442 ; CHECK-XOP: # %bb.0: 443 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 444 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 445 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 446 ; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0 447 ; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 448 ; CHECK-XOP-NEXT: retq 449 %x = load <4 x i32>, <4 x i32> *%px, align 16 450 %y = load <4 x i32>, <4 x i32> *%py, align 16 451 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 452 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 453 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x 454 %n1 = and <4 x i32> %n0, %notmask 455 %r = xor <4 x i32> %n1, %y 456 ret <4 x i32> %r 457 } 458 459 define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 460 ; CHECK-SSE1-LABEL: out_constant_42_vary: 461 ; CHECK-SSE1: # %bb.0: 462 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 463 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44] 464 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 465 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 466 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 467 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 468 ; CHECK-SSE1-NEXT: movq %rdi, %rax 469 ; CHECK-SSE1-NEXT: retq 470 ; 471 ; CHECK-SSE2-LABEL: out_constant_42_vary: 472 ; CHECK-SSE2: # %bb.0: 473 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 474 ; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42] 475 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 476 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 477 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 478 ; CHECK-SSE2-NEXT: retq 479 ; 480 ; CHECK-XOP-LABEL: out_constant_42_vary: 481 ; CHECK-XOP: # %bb.0: 482 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 483 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42] 484 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0 485 ; CHECK-XOP-NEXT: retq 486 %x = load <4 x i32>, <4 x i32> *%px, align 16 487 %y = load <4 x i32>, <4 x i32> *%py, align 16 488 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 489 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 490 %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42> 491 %my = and <4 x i32> %notmask, %y 492 %r = or <4 x i32> %mx, %my 493 ret <4 x i32> %r 494 } 495 496 define <4 x i32> @in_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 497 ; CHECK-SSE1-LABEL: in_constant_42_vary: 498 ; CHECK-SSE1: # %bb.0: 499 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 500 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 501 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 502 ; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0 503 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 504 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 505 ; CHECK-SSE1-NEXT: movq %rdi, %rax 506 ; CHECK-SSE1-NEXT: retq 507 ; 508 ; CHECK-SSE2-LABEL: in_constant_42_vary: 509 ; CHECK-SSE2: # %bb.0: 510 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 511 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 512 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1 513 ; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0 514 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 515 ; CHECK-SSE2-NEXT: retq 516 ; 517 ; CHECK-XOP-LABEL: in_constant_42_vary: 518 ; CHECK-XOP: # %bb.0: 519 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 520 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42] 521 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0 522 ; CHECK-XOP-NEXT: retq 523 %x = load <4 x i32>, <4 x i32> *%px, align 16 524 %y = load <4 x i32>, <4 x i32> *%py, align 16 525 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 526 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x 527 %n1 = and <4 x i32> %n0, %mask 528 %r = xor <4 x i32> %n1, %y 529 ret <4 x i32> %r 530 } 531 532 ; This is not a canonical form. Testing for completeness only. 533 define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 534 ; CHECK-SSE1-LABEL: out_constant_42_vary_invmask: 535 ; CHECK-SSE1: # %bb.0: 536 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 537 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 538 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm1 539 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 540 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 541 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 542 ; CHECK-SSE1-NEXT: movq %rdi, %rax 543 ; CHECK-SSE1-NEXT: retq 544 ; 545 ; CHECK-SSE2-LABEL: out_constant_42_vary_invmask: 546 ; CHECK-SSE2: # %bb.0: 547 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 548 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 549 ; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm1 550 ; CHECK-SSE2-NEXT: andps (%rsi), %xmm0 551 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 552 ; CHECK-SSE2-NEXT: retq 553 ; 554 ; CHECK-XOP-LABEL: out_constant_42_vary_invmask: 555 ; CHECK-XOP: # %bb.0: 556 ; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0 557 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 558 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0 559 ; CHECK-XOP-NEXT: retq 560 %x = load <4 x i32>, <4 x i32> *%px, align 16 561 %y = load <4 x i32>, <4 x i32> *%py, align 16 562 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 563 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 564 %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42> 565 %my = and <4 x i32> %mask, %y 566 %r = or <4 x i32> %mx, %my 567 ret <4 x i32> %r 568 } 569 570 ; This is not a canonical form. Testing for completeness only. 571 define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 572 ; CHECK-SSE1-LABEL: in_constant_42_vary_invmask: 573 ; CHECK-SSE1: # %bb.0: 574 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 575 ; CHECK-SSE1-NEXT: movaps (%rdx), %xmm1 576 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 577 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0 578 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 579 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 580 ; CHECK-SSE1-NEXT: movq %rdi, %rax 581 ; CHECK-SSE1-NEXT: retq 582 ; 583 ; CHECK-SSE2-LABEL: in_constant_42_vary_invmask: 584 ; CHECK-SSE2: # %bb.0: 585 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 586 ; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1 587 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 588 ; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0 589 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 590 ; CHECK-SSE2-NEXT: retq 591 ; 592 ; CHECK-XOP-LABEL: in_constant_42_vary_invmask: 593 ; CHECK-XOP: # %bb.0: 594 ; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0 595 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 596 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0 597 ; CHECK-XOP-NEXT: retq 598 %x = load <4 x i32>, <4 x i32> *%px, align 16 599 %y = load <4 x i32>, <4 x i32> *%py, align 16 600 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 601 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 602 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x 603 %n1 = and <4 x i32> %n0, %notmask 604 %r = xor <4 x i32> %n1, %y 605 ret <4 x i32> %r 606 } 607