1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 -disable-peephole | FileCheck %s --check-prefix=SSE 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=AVX 4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl -disable-peephole | FileCheck %s --check-prefix=AVX512 5 6 ; 7 ; Float Comparisons 8 ; Only equal/not-equal/ordered/unordered can be safely commuted 9 ; 10 11 define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) { 12 ; SSE-LABEL: commute_cmpps_eq: 13 ; SSE: # %bb.0: 14 ; SSE-NEXT: cmpeqps (%rdi), %xmm0 15 ; SSE-NEXT: retq 16 ; 17 ; AVX-LABEL: commute_cmpps_eq: 18 ; AVX: # %bb.0: 19 ; AVX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 20 ; AVX-NEXT: retq 21 ; 22 ; AVX512-LABEL: commute_cmpps_eq: 23 ; AVX512: # %bb.0: 24 ; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 25 ; AVX512-NEXT: retq 26 %1 = load <4 x float>, <4 x float>* %a0 27 %2 = fcmp oeq <4 x float> %1, %a1 28 %3 = sext <4 x i1> %2 to <4 x i32> 29 ret <4 x i32> %3 30 } 31 32 define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) { 33 ; SSE-LABEL: commute_cmpps_ne: 34 ; SSE: # %bb.0: 35 ; SSE-NEXT: cmpneqps (%rdi), %xmm0 36 ; SSE-NEXT: retq 37 ; 38 ; AVX-LABEL: commute_cmpps_ne: 39 ; AVX: # %bb.0: 40 ; AVX-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0 41 ; AVX-NEXT: retq 42 ; 43 ; AVX512-LABEL: commute_cmpps_ne: 44 ; AVX512: # %bb.0: 45 ; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0 46 ; AVX512-NEXT: retq 47 %1 = load <4 x float>, <4 x float>* %a0 48 %2 = fcmp une <4 x float> %1, %a1 49 %3 = sext <4 x i1> %2 to <4 x i32> 50 ret <4 x i32> %3 51 } 52 53 define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) { 54 ; SSE-LABEL: commute_cmpps_ord: 55 ; SSE: # %bb.0: 56 ; SSE-NEXT: cmpordps (%rdi), %xmm0 57 ; SSE-NEXT: retq 58 ; 59 ; AVX-LABEL: commute_cmpps_ord: 60 ; AVX: # %bb.0: 61 ; AVX-NEXT: vcmpordps (%rdi), %xmm0, %xmm0 62 ; AVX-NEXT: retq 63 ; 64 ; AVX512-LABEL: commute_cmpps_ord: 65 ; AVX512: # %bb.0: 66 ; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %xmm0 67 ; AVX512-NEXT: retq 68 %1 = load <4 x float>, <4 x float>* %a0 69 %2 = fcmp ord <4 x float> %1, %a1 70 %3 = sext <4 x i1> %2 to <4 x i32> 71 ret <4 x i32> %3 72 } 73 74 define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) { 75 ; SSE-LABEL: commute_cmpps_uno: 76 ; SSE: # %bb.0: 77 ; SSE-NEXT: cmpunordps (%rdi), %xmm0 78 ; SSE-NEXT: retq 79 ; 80 ; AVX-LABEL: commute_cmpps_uno: 81 ; AVX: # %bb.0: 82 ; AVX-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0 83 ; AVX-NEXT: retq 84 ; 85 ; AVX512-LABEL: commute_cmpps_uno: 86 ; AVX512: # %bb.0: 87 ; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0 88 ; AVX512-NEXT: retq 89 %1 = load <4 x float>, <4 x float>* %a0 90 %2 = fcmp uno <4 x float> %1, %a1 91 %3 = sext <4 x i1> %2 to <4 x i32> 92 ret <4 x i32> %3 93 } 94 95 define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) { 96 ; SSE-LABEL: commute_cmpps_ueq: 97 ; SSE: # %bb.0: 98 ; SSE-NEXT: movaps (%rdi), %xmm1 99 ; SSE-NEXT: movaps %xmm1, %xmm2 100 ; SSE-NEXT: cmpeqps %xmm0, %xmm2 101 ; SSE-NEXT: cmpunordps %xmm1, %xmm0 102 ; SSE-NEXT: orps %xmm2, %xmm0 103 ; SSE-NEXT: retq 104 ; 105 ; AVX-LABEL: commute_cmpps_ueq: 106 ; AVX: # %bb.0: 107 ; AVX-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0 108 ; AVX-NEXT: retq 109 ; 110 ; AVX512-LABEL: commute_cmpps_ueq: 111 ; AVX512: # %bb.0: 112 ; AVX512-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0 113 ; AVX512-NEXT: retq 114 %1 = load <4 x float>, <4 x float>* %a0 115 %2 = fcmp ueq <4 x float> %1, %a1 116 %3 = sext <4 x i1> %2 to <4 x i32> 117 ret <4 x i32> %3 118 } 119 120 define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) { 121 ; SSE-LABEL: commute_cmpps_one: 122 ; SSE: # %bb.0: 123 ; SSE-NEXT: movaps (%rdi), %xmm1 124 ; SSE-NEXT: movaps %xmm1, %xmm2 125 ; SSE-NEXT: cmpneqps %xmm0, %xmm2 126 ; SSE-NEXT: cmpordps %xmm1, %xmm0 127 ; SSE-NEXT: andps %xmm2, %xmm0 128 ; SSE-NEXT: retq 129 ; 130 ; AVX-LABEL: commute_cmpps_one: 131 ; AVX: # %bb.0: 132 ; AVX-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0 133 ; AVX-NEXT: retq 134 ; 135 ; AVX512-LABEL: commute_cmpps_one: 136 ; AVX512: # %bb.0: 137 ; AVX512-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0 138 ; AVX512-NEXT: retq 139 %1 = load <4 x float>, <4 x float>* %a0 140 %2 = fcmp one <4 x float> %1, %a1 141 %3 = sext <4 x i1> %2 to <4 x i32> 142 ret <4 x i32> %3 143 } 144 145 define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) { 146 ; SSE-LABEL: commute_cmpps_lt: 147 ; SSE: # %bb.0: 148 ; SSE-NEXT: movaps (%rdi), %xmm1 149 ; SSE-NEXT: cmpltps %xmm0, %xmm1 150 ; SSE-NEXT: movaps %xmm1, %xmm0 151 ; SSE-NEXT: retq 152 ; 153 ; AVX-LABEL: commute_cmpps_lt: 154 ; AVX: # %bb.0: 155 ; AVX-NEXT: vmovaps (%rdi), %xmm1 156 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 157 ; AVX-NEXT: retq 158 ; 159 ; AVX512-LABEL: commute_cmpps_lt: 160 ; AVX512: # %bb.0: 161 ; AVX512-NEXT: vmovaps (%rdi), %xmm1 162 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 163 ; AVX512-NEXT: retq 164 %1 = load <4 x float>, <4 x float>* %a0 165 %2 = fcmp olt <4 x float> %1, %a1 166 %3 = sext <4 x i1> %2 to <4 x i32> 167 ret <4 x i32> %3 168 } 169 170 define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) { 171 ; SSE-LABEL: commute_cmpps_le: 172 ; SSE: # %bb.0: 173 ; SSE-NEXT: movaps (%rdi), %xmm1 174 ; SSE-NEXT: cmpleps %xmm0, %xmm1 175 ; SSE-NEXT: movaps %xmm1, %xmm0 176 ; SSE-NEXT: retq 177 ; 178 ; AVX-LABEL: commute_cmpps_le: 179 ; AVX: # %bb.0: 180 ; AVX-NEXT: vmovaps (%rdi), %xmm1 181 ; AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 182 ; AVX-NEXT: retq 183 ; 184 ; AVX512-LABEL: commute_cmpps_le: 185 ; AVX512: # %bb.0: 186 ; AVX512-NEXT: vmovaps (%rdi), %xmm1 187 ; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 188 ; AVX512-NEXT: retq 189 %1 = load <4 x float>, <4 x float>* %a0 190 %2 = fcmp ole <4 x float> %1, %a1 191 %3 = sext <4 x i1> %2 to <4 x i32> 192 ret <4 x i32> %3 193 } 194 195 define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) { 196 ; SSE-LABEL: commute_cmpps_eq_ymm: 197 ; SSE: # %bb.0: 198 ; SSE-NEXT: cmpeqps (%rdi), %xmm0 199 ; SSE-NEXT: cmpeqps 16(%rdi), %xmm1 200 ; SSE-NEXT: retq 201 ; 202 ; AVX-LABEL: commute_cmpps_eq_ymm: 203 ; AVX: # %bb.0: 204 ; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 205 ; AVX-NEXT: retq 206 ; 207 ; AVX512-LABEL: commute_cmpps_eq_ymm: 208 ; AVX512: # %bb.0: 209 ; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 210 ; AVX512-NEXT: retq 211 %1 = load <8 x float>, <8 x float>* %a0 212 %2 = fcmp oeq <8 x float> %1, %a1 213 %3 = sext <8 x i1> %2 to <8 x i32> 214 ret <8 x i32> %3 215 } 216 217 define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) { 218 ; SSE-LABEL: commute_cmpps_ne_ymm: 219 ; SSE: # %bb.0: 220 ; SSE-NEXT: cmpneqps (%rdi), %xmm0 221 ; SSE-NEXT: cmpneqps 16(%rdi), %xmm1 222 ; SSE-NEXT: retq 223 ; 224 ; AVX-LABEL: commute_cmpps_ne_ymm: 225 ; AVX: # %bb.0: 226 ; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0 227 ; AVX-NEXT: retq 228 ; 229 ; AVX512-LABEL: commute_cmpps_ne_ymm: 230 ; AVX512: # %bb.0: 231 ; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0 232 ; AVX512-NEXT: retq 233 %1 = load <8 x float>, <8 x float>* %a0 234 %2 = fcmp une <8 x float> %1, %a1 235 %3 = sext <8 x i1> %2 to <8 x i32> 236 ret <8 x i32> %3 237 } 238 239 define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) { 240 ; SSE-LABEL: commute_cmpps_ord_ymm: 241 ; SSE: # %bb.0: 242 ; SSE-NEXT: cmpordps (%rdi), %xmm0 243 ; SSE-NEXT: cmpordps 16(%rdi), %xmm1 244 ; SSE-NEXT: retq 245 ; 246 ; AVX-LABEL: commute_cmpps_ord_ymm: 247 ; AVX: # %bb.0: 248 ; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0 249 ; AVX-NEXT: retq 250 ; 251 ; AVX512-LABEL: commute_cmpps_ord_ymm: 252 ; AVX512: # %bb.0: 253 ; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %ymm0 254 ; AVX512-NEXT: retq 255 %1 = load <8 x float>, <8 x float>* %a0 256 %2 = fcmp ord <8 x float> %1, %a1 257 %3 = sext <8 x i1> %2 to <8 x i32> 258 ret <8 x i32> %3 259 } 260 261 define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) { 262 ; SSE-LABEL: commute_cmpps_uno_ymm: 263 ; SSE: # %bb.0: 264 ; SSE-NEXT: cmpunordps (%rdi), %xmm0 265 ; SSE-NEXT: cmpunordps 16(%rdi), %xmm1 266 ; SSE-NEXT: retq 267 ; 268 ; AVX-LABEL: commute_cmpps_uno_ymm: 269 ; AVX: # %bb.0: 270 ; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0 271 ; AVX-NEXT: retq 272 ; 273 ; AVX512-LABEL: commute_cmpps_uno_ymm: 274 ; AVX512: # %bb.0: 275 ; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0 276 ; AVX512-NEXT: retq 277 %1 = load <8 x float>, <8 x float>* %a0 278 %2 = fcmp uno <8 x float> %1, %a1 279 %3 = sext <8 x i1> %2 to <8 x i32> 280 ret <8 x i32> %3 281 } 282 283 define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) { 284 ; SSE-LABEL: commute_cmpps_ueq_ymm: 285 ; SSE: # %bb.0: 286 ; SSE-NEXT: movaps (%rdi), %xmm2 287 ; SSE-NEXT: movaps 16(%rdi), %xmm3 288 ; SSE-NEXT: movaps %xmm2, %xmm4 289 ; SSE-NEXT: cmpeqps %xmm0, %xmm4 290 ; SSE-NEXT: cmpunordps %xmm2, %xmm0 291 ; SSE-NEXT: orps %xmm4, %xmm0 292 ; SSE-NEXT: movaps %xmm3, %xmm2 293 ; SSE-NEXT: cmpeqps %xmm1, %xmm2 294 ; SSE-NEXT: cmpunordps %xmm3, %xmm1 295 ; SSE-NEXT: orps %xmm2, %xmm1 296 ; SSE-NEXT: retq 297 ; 298 ; AVX-LABEL: commute_cmpps_ueq_ymm: 299 ; AVX: # %bb.0: 300 ; AVX-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0 301 ; AVX-NEXT: retq 302 ; 303 ; AVX512-LABEL: commute_cmpps_ueq_ymm: 304 ; AVX512: # %bb.0: 305 ; AVX512-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0 306 ; AVX512-NEXT: retq 307 %1 = load <8 x float>, <8 x float>* %a0 308 %2 = fcmp ueq <8 x float> %1, %a1 309 %3 = sext <8 x i1> %2 to <8 x i32> 310 ret <8 x i32> %3 311 } 312 313 define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) { 314 ; SSE-LABEL: commute_cmpps_one_ymm: 315 ; SSE: # %bb.0: 316 ; SSE-NEXT: movaps (%rdi), %xmm2 317 ; SSE-NEXT: movaps 16(%rdi), %xmm3 318 ; SSE-NEXT: movaps %xmm2, %xmm4 319 ; SSE-NEXT: cmpneqps %xmm0, %xmm4 320 ; SSE-NEXT: cmpordps %xmm2, %xmm0 321 ; SSE-NEXT: andps %xmm4, %xmm0 322 ; SSE-NEXT: movaps %xmm3, %xmm2 323 ; SSE-NEXT: cmpneqps %xmm1, %xmm2 324 ; SSE-NEXT: cmpordps %xmm3, %xmm1 325 ; SSE-NEXT: andps %xmm2, %xmm1 326 ; SSE-NEXT: retq 327 ; 328 ; AVX-LABEL: commute_cmpps_one_ymm: 329 ; AVX: # %bb.0: 330 ; AVX-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0 331 ; AVX-NEXT: retq 332 ; 333 ; AVX512-LABEL: commute_cmpps_one_ymm: 334 ; AVX512: # %bb.0: 335 ; AVX512-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0 336 ; AVX512-NEXT: retq 337 %1 = load <8 x float>, <8 x float>* %a0 338 %2 = fcmp one <8 x float> %1, %a1 339 %3 = sext <8 x i1> %2 to <8 x i32> 340 ret <8 x i32> %3 341 } 342 343 define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) { 344 ; SSE-LABEL: commute_cmpps_lt_ymm: 345 ; SSE: # %bb.0: 346 ; SSE-NEXT: movaps (%rdi), %xmm2 347 ; SSE-NEXT: movaps 16(%rdi), %xmm3 348 ; SSE-NEXT: cmpltps %xmm0, %xmm2 349 ; SSE-NEXT: cmpltps %xmm1, %xmm3 350 ; SSE-NEXT: movaps %xmm2, %xmm0 351 ; SSE-NEXT: movaps %xmm3, %xmm1 352 ; SSE-NEXT: retq 353 ; 354 ; AVX-LABEL: commute_cmpps_lt_ymm: 355 ; AVX: # %bb.0: 356 ; AVX-NEXT: vmovaps (%rdi), %ymm1 357 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 358 ; AVX-NEXT: retq 359 ; 360 ; AVX512-LABEL: commute_cmpps_lt_ymm: 361 ; AVX512: # %bb.0: 362 ; AVX512-NEXT: vmovaps (%rdi), %ymm1 363 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 364 ; AVX512-NEXT: retq 365 %1 = load <8 x float>, <8 x float>* %a0 366 %2 = fcmp olt <8 x float> %1, %a1 367 %3 = sext <8 x i1> %2 to <8 x i32> 368 ret <8 x i32> %3 369 } 370 371 define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) { 372 ; SSE-LABEL: commute_cmpps_le_ymm: 373 ; SSE: # %bb.0: 374 ; SSE-NEXT: movaps (%rdi), %xmm2 375 ; SSE-NEXT: movaps 16(%rdi), %xmm3 376 ; SSE-NEXT: cmpleps %xmm0, %xmm2 377 ; SSE-NEXT: cmpleps %xmm1, %xmm3 378 ; SSE-NEXT: movaps %xmm2, %xmm0 379 ; SSE-NEXT: movaps %xmm3, %xmm1 380 ; SSE-NEXT: retq 381 ; 382 ; AVX-LABEL: commute_cmpps_le_ymm: 383 ; AVX: # %bb.0: 384 ; AVX-NEXT: vmovaps (%rdi), %ymm1 385 ; AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0 386 ; AVX-NEXT: retq 387 ; 388 ; AVX512-LABEL: commute_cmpps_le_ymm: 389 ; AVX512: # %bb.0: 390 ; AVX512-NEXT: vmovaps (%rdi), %ymm1 391 ; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %ymm0 392 ; AVX512-NEXT: retq 393 %1 = load <8 x float>, <8 x float>* %a0 394 %2 = fcmp ole <8 x float> %1, %a1 395 %3 = sext <8 x i1> %2 to <8 x i32> 396 ret <8 x i32> %3 397 } 398 399 ; 400 ; Double Comparisons 401 ; Only equal/not-equal/ordered/unordered can be safely commuted 402 ; 403 404 define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) { 405 ; SSE-LABEL: commute_cmppd_eq: 406 ; SSE: # %bb.0: 407 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0 408 ; SSE-NEXT: retq 409 ; 410 ; AVX-LABEL: commute_cmppd_eq: 411 ; AVX: # %bb.0: 412 ; AVX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 413 ; AVX-NEXT: retq 414 ; 415 ; AVX512-LABEL: commute_cmppd_eq: 416 ; AVX512: # %bb.0: 417 ; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 418 ; AVX512-NEXT: retq 419 %1 = load <2 x double>, <2 x double>* %a0 420 %2 = fcmp oeq <2 x double> %1, %a1 421 %3 = sext <2 x i1> %2 to <2 x i64> 422 ret <2 x i64> %3 423 } 424 425 define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) { 426 ; SSE-LABEL: commute_cmppd_ne: 427 ; SSE: # %bb.0: 428 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0 429 ; SSE-NEXT: retq 430 ; 431 ; AVX-LABEL: commute_cmppd_ne: 432 ; AVX: # %bb.0: 433 ; AVX-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0 434 ; AVX-NEXT: retq 435 ; 436 ; AVX512-LABEL: commute_cmppd_ne: 437 ; AVX512: # %bb.0: 438 ; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0 439 ; AVX512-NEXT: retq 440 %1 = load <2 x double>, <2 x double>* %a0 441 %2 = fcmp une <2 x double> %1, %a1 442 %3 = sext <2 x i1> %2 to <2 x i64> 443 ret <2 x i64> %3 444 } 445 446 define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) { 447 ; SSE-LABEL: commute_cmppd_ord: 448 ; SSE: # %bb.0: 449 ; SSE-NEXT: cmpordpd (%rdi), %xmm0 450 ; SSE-NEXT: retq 451 ; 452 ; AVX-LABEL: commute_cmppd_ord: 453 ; AVX: # %bb.0: 454 ; AVX-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0 455 ; AVX-NEXT: retq 456 ; 457 ; AVX512-LABEL: commute_cmppd_ord: 458 ; AVX512: # %bb.0: 459 ; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0 460 ; AVX512-NEXT: retq 461 %1 = load <2 x double>, <2 x double>* %a0 462 %2 = fcmp ord <2 x double> %1, %a1 463 %3 = sext <2 x i1> %2 to <2 x i64> 464 ret <2 x i64> %3 465 } 466 467 define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) { 468 ; SSE-LABEL: commute_cmppd_ueq: 469 ; SSE: # %bb.0: 470 ; SSE-NEXT: movapd (%rdi), %xmm1 471 ; SSE-NEXT: movapd %xmm1, %xmm2 472 ; SSE-NEXT: cmpeqpd %xmm0, %xmm2 473 ; SSE-NEXT: cmpunordpd %xmm1, %xmm0 474 ; SSE-NEXT: orpd %xmm2, %xmm0 475 ; SSE-NEXT: retq 476 ; 477 ; AVX-LABEL: commute_cmppd_ueq: 478 ; AVX: # %bb.0: 479 ; AVX-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0 480 ; AVX-NEXT: retq 481 ; 482 ; AVX512-LABEL: commute_cmppd_ueq: 483 ; AVX512: # %bb.0: 484 ; AVX512-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0 485 ; AVX512-NEXT: retq 486 %1 = load <2 x double>, <2 x double>* %a0 487 %2 = fcmp ueq <2 x double> %1, %a1 488 %3 = sext <2 x i1> %2 to <2 x i64> 489 ret <2 x i64> %3 490 } 491 492 define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) { 493 ; SSE-LABEL: commute_cmppd_one: 494 ; SSE: # %bb.0: 495 ; SSE-NEXT: movapd (%rdi), %xmm1 496 ; SSE-NEXT: movapd %xmm1, %xmm2 497 ; SSE-NEXT: cmpneqpd %xmm0, %xmm2 498 ; SSE-NEXT: cmpordpd %xmm1, %xmm0 499 ; SSE-NEXT: andpd %xmm2, %xmm0 500 ; SSE-NEXT: retq 501 ; 502 ; AVX-LABEL: commute_cmppd_one: 503 ; AVX: # %bb.0: 504 ; AVX-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0 505 ; AVX-NEXT: retq 506 ; 507 ; AVX512-LABEL: commute_cmppd_one: 508 ; AVX512: # %bb.0: 509 ; AVX512-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0 510 ; AVX512-NEXT: retq 511 %1 = load <2 x double>, <2 x double>* %a0 512 %2 = fcmp one <2 x double> %1, %a1 513 %3 = sext <2 x i1> %2 to <2 x i64> 514 ret <2 x i64> %3 515 } 516 517 define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) { 518 ; SSE-LABEL: commute_cmppd_uno: 519 ; SSE: # %bb.0: 520 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0 521 ; SSE-NEXT: retq 522 ; 523 ; AVX-LABEL: commute_cmppd_uno: 524 ; AVX: # %bb.0: 525 ; AVX-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0 526 ; AVX-NEXT: retq 527 ; 528 ; AVX512-LABEL: commute_cmppd_uno: 529 ; AVX512: # %bb.0: 530 ; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0 531 ; AVX512-NEXT: retq 532 %1 = load <2 x double>, <2 x double>* %a0 533 %2 = fcmp uno <2 x double> %1, %a1 534 %3 = sext <2 x i1> %2 to <2 x i64> 535 ret <2 x i64> %3 536 } 537 538 define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) { 539 ; SSE-LABEL: commute_cmppd_lt: 540 ; SSE: # %bb.0: 541 ; SSE-NEXT: movapd (%rdi), %xmm1 542 ; SSE-NEXT: cmpltpd %xmm0, %xmm1 543 ; SSE-NEXT: movapd %xmm1, %xmm0 544 ; SSE-NEXT: retq 545 ; 546 ; AVX-LABEL: commute_cmppd_lt: 547 ; AVX: # %bb.0: 548 ; AVX-NEXT: vmovapd (%rdi), %xmm1 549 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 550 ; AVX-NEXT: retq 551 ; 552 ; AVX512-LABEL: commute_cmppd_lt: 553 ; AVX512: # %bb.0: 554 ; AVX512-NEXT: vmovapd (%rdi), %xmm1 555 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 556 ; AVX512-NEXT: retq 557 %1 = load <2 x double>, <2 x double>* %a0 558 %2 = fcmp olt <2 x double> %1, %a1 559 %3 = sext <2 x i1> %2 to <2 x i64> 560 ret <2 x i64> %3 561 } 562 563 define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) { 564 ; SSE-LABEL: commute_cmppd_le: 565 ; SSE: # %bb.0: 566 ; SSE-NEXT: movapd (%rdi), %xmm1 567 ; SSE-NEXT: cmplepd %xmm0, %xmm1 568 ; SSE-NEXT: movapd %xmm1, %xmm0 569 ; SSE-NEXT: retq 570 ; 571 ; AVX-LABEL: commute_cmppd_le: 572 ; AVX: # %bb.0: 573 ; AVX-NEXT: vmovapd (%rdi), %xmm1 574 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 575 ; AVX-NEXT: retq 576 ; 577 ; AVX512-LABEL: commute_cmppd_le: 578 ; AVX512: # %bb.0: 579 ; AVX512-NEXT: vmovapd (%rdi), %xmm1 580 ; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 581 ; AVX512-NEXT: retq 582 %1 = load <2 x double>, <2 x double>* %a0 583 %2 = fcmp ole <2 x double> %1, %a1 584 %3 = sext <2 x i1> %2 to <2 x i64> 585 ret <2 x i64> %3 586 } 587 588 define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) { 589 ; SSE-LABEL: commute_cmppd_eq_ymmm: 590 ; SSE: # %bb.0: 591 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0 592 ; SSE-NEXT: cmpeqpd 16(%rdi), %xmm1 593 ; SSE-NEXT: retq 594 ; 595 ; AVX-LABEL: commute_cmppd_eq_ymmm: 596 ; AVX: # %bb.0: 597 ; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 598 ; AVX-NEXT: retq 599 ; 600 ; AVX512-LABEL: commute_cmppd_eq_ymmm: 601 ; AVX512: # %bb.0: 602 ; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 603 ; AVX512-NEXT: retq 604 %1 = load <4 x double>, <4 x double>* %a0 605 %2 = fcmp oeq <4 x double> %1, %a1 606 %3 = sext <4 x i1> %2 to <4 x i64> 607 ret <4 x i64> %3 608 } 609 610 define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) { 611 ; SSE-LABEL: commute_cmppd_ne_ymmm: 612 ; SSE: # %bb.0: 613 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0 614 ; SSE-NEXT: cmpneqpd 16(%rdi), %xmm1 615 ; SSE-NEXT: retq 616 ; 617 ; AVX-LABEL: commute_cmppd_ne_ymmm: 618 ; AVX: # %bb.0: 619 ; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0 620 ; AVX-NEXT: retq 621 ; 622 ; AVX512-LABEL: commute_cmppd_ne_ymmm: 623 ; AVX512: # %bb.0: 624 ; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0 625 ; AVX512-NEXT: retq 626 %1 = load <4 x double>, <4 x double>* %a0 627 %2 = fcmp une <4 x double> %1, %a1 628 %3 = sext <4 x i1> %2 to <4 x i64> 629 ret <4 x i64> %3 630 } 631 632 define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) { 633 ; SSE-LABEL: commute_cmppd_ord_ymmm: 634 ; SSE: # %bb.0: 635 ; SSE-NEXT: cmpordpd (%rdi), %xmm0 636 ; SSE-NEXT: cmpordpd 16(%rdi), %xmm1 637 ; SSE-NEXT: retq 638 ; 639 ; AVX-LABEL: commute_cmppd_ord_ymmm: 640 ; AVX: # %bb.0: 641 ; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0 642 ; AVX-NEXT: retq 643 ; 644 ; AVX512-LABEL: commute_cmppd_ord_ymmm: 645 ; AVX512: # %bb.0: 646 ; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0 647 ; AVX512-NEXT: retq 648 %1 = load <4 x double>, <4 x double>* %a0 649 %2 = fcmp ord <4 x double> %1, %a1 650 %3 = sext <4 x i1> %2 to <4 x i64> 651 ret <4 x i64> %3 652 } 653 654 define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) { 655 ; SSE-LABEL: commute_cmppd_uno_ymmm: 656 ; SSE: # %bb.0: 657 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0 658 ; SSE-NEXT: cmpunordpd 16(%rdi), %xmm1 659 ; SSE-NEXT: retq 660 ; 661 ; AVX-LABEL: commute_cmppd_uno_ymmm: 662 ; AVX: # %bb.0: 663 ; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0 664 ; AVX-NEXT: retq 665 ; 666 ; AVX512-LABEL: commute_cmppd_uno_ymmm: 667 ; AVX512: # %bb.0: 668 ; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0 669 ; AVX512-NEXT: retq 670 %1 = load <4 x double>, <4 x double>* %a0 671 %2 = fcmp uno <4 x double> %1, %a1 672 %3 = sext <4 x i1> %2 to <4 x i64> 673 ret <4 x i64> %3 674 } 675 676 define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) { 677 ; SSE-LABEL: commute_cmppd_ueq_ymmm: 678 ; SSE: # %bb.0: 679 ; SSE-NEXT: movapd (%rdi), %xmm2 680 ; SSE-NEXT: movapd 16(%rdi), %xmm3 681 ; SSE-NEXT: movapd %xmm2, %xmm4 682 ; SSE-NEXT: cmpeqpd %xmm0, %xmm4 683 ; SSE-NEXT: cmpunordpd %xmm2, %xmm0 684 ; SSE-NEXT: orpd %xmm4, %xmm0 685 ; SSE-NEXT: movapd %xmm3, %xmm2 686 ; SSE-NEXT: cmpeqpd %xmm1, %xmm2 687 ; SSE-NEXT: cmpunordpd %xmm3, %xmm1 688 ; SSE-NEXT: orpd %xmm2, %xmm1 689 ; SSE-NEXT: retq 690 ; 691 ; AVX-LABEL: commute_cmppd_ueq_ymmm: 692 ; AVX: # %bb.0: 693 ; AVX-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0 694 ; AVX-NEXT: retq 695 ; 696 ; AVX512-LABEL: commute_cmppd_ueq_ymmm: 697 ; AVX512: # %bb.0: 698 ; AVX512-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0 699 ; AVX512-NEXT: retq 700 %1 = load <4 x double>, <4 x double>* %a0 701 %2 = fcmp ueq <4 x double> %1, %a1 702 %3 = sext <4 x i1> %2 to <4 x i64> 703 ret <4 x i64> %3 704 } 705 706 define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) { 707 ; SSE-LABEL: commute_cmppd_one_ymmm: 708 ; SSE: # %bb.0: 709 ; SSE-NEXT: movapd (%rdi), %xmm2 710 ; SSE-NEXT: movapd 16(%rdi), %xmm3 711 ; SSE-NEXT: movapd %xmm2, %xmm4 712 ; SSE-NEXT: cmpneqpd %xmm0, %xmm4 713 ; SSE-NEXT: cmpordpd %xmm2, %xmm0 714 ; SSE-NEXT: andpd %xmm4, %xmm0 715 ; SSE-NEXT: movapd %xmm3, %xmm2 716 ; SSE-NEXT: cmpneqpd %xmm1, %xmm2 717 ; SSE-NEXT: cmpordpd %xmm3, %xmm1 718 ; SSE-NEXT: andpd %xmm2, %xmm1 719 ; SSE-NEXT: retq 720 ; 721 ; AVX-LABEL: commute_cmppd_one_ymmm: 722 ; AVX: # %bb.0: 723 ; AVX-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0 724 ; AVX-NEXT: retq 725 ; 726 ; AVX512-LABEL: commute_cmppd_one_ymmm: 727 ; AVX512: # %bb.0: 728 ; AVX512-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0 729 ; AVX512-NEXT: retq 730 %1 = load <4 x double>, <4 x double>* %a0 731 %2 = fcmp one <4 x double> %1, %a1 732 %3 = sext <4 x i1> %2 to <4 x i64> 733 ret <4 x i64> %3 734 } 735 736 define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) { 737 ; SSE-LABEL: commute_cmppd_lt_ymmm: 738 ; SSE: # %bb.0: 739 ; SSE-NEXT: movapd (%rdi), %xmm2 740 ; SSE-NEXT: movapd 16(%rdi), %xmm3 741 ; SSE-NEXT: cmpltpd %xmm0, %xmm2 742 ; SSE-NEXT: cmpltpd %xmm1, %xmm3 743 ; SSE-NEXT: movapd %xmm2, %xmm0 744 ; SSE-NEXT: movapd %xmm3, %xmm1 745 ; SSE-NEXT: retq 746 ; 747 ; AVX-LABEL: commute_cmppd_lt_ymmm: 748 ; AVX: # %bb.0: 749 ; AVX-NEXT: vmovapd (%rdi), %ymm1 750 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 751 ; AVX-NEXT: retq 752 ; 753 ; AVX512-LABEL: commute_cmppd_lt_ymmm: 754 ; AVX512: # %bb.0: 755 ; AVX512-NEXT: vmovapd (%rdi), %ymm1 756 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 757 ; AVX512-NEXT: retq 758 %1 = load <4 x double>, <4 x double>* %a0 759 %2 = fcmp olt <4 x double> %1, %a1 760 %3 = sext <4 x i1> %2 to <4 x i64> 761 ret <4 x i64> %3 762 } 763 764 define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) { 765 ; SSE-LABEL: commute_cmppd_le_ymmm: 766 ; SSE: # %bb.0: 767 ; SSE-NEXT: movapd (%rdi), %xmm2 768 ; SSE-NEXT: movapd 16(%rdi), %xmm3 769 ; SSE-NEXT: cmplepd %xmm0, %xmm2 770 ; SSE-NEXT: cmplepd %xmm1, %xmm3 771 ; SSE-NEXT: movapd %xmm2, %xmm0 772 ; SSE-NEXT: movapd %xmm3, %xmm1 773 ; SSE-NEXT: retq 774 ; 775 ; AVX-LABEL: commute_cmppd_le_ymmm: 776 ; AVX: # %bb.0: 777 ; AVX-NEXT: vmovapd (%rdi), %ymm1 778 ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 779 ; AVX-NEXT: retq 780 ; 781 ; AVX512-LABEL: commute_cmppd_le_ymmm: 782 ; AVX512: # %bb.0: 783 ; AVX512-NEXT: vmovapd (%rdi), %ymm1 784 ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 785 ; AVX512-NEXT: retq 786 %1 = load <4 x double>, <4 x double>* %a0 787 %2 = fcmp ole <4 x double> %1, %a1 788 %3 = sext <4 x i1> %2 to <4 x i64> 789 ret <4 x i64> %3 790 } 791