1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX 4 5 ; 6 ; Float Comparisons 7 ; Only equal/not-equal/ordered/unordered can be safely commuted 8 ; 9 10 define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) { 11 ; SSE-LABEL: commute_cmpps_eq: 12 ; SSE: # BB#0: 13 ; SSE-NEXT: cmpeqps (%rdi), %xmm0 14 ; SSE-NEXT: retq 15 ; 16 ; AVX-LABEL: commute_cmpps_eq: 17 ; AVX: # BB#0: 18 ; AVX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 19 ; AVX-NEXT: retq 20 ; 21 %1 = load <4 x float>, <4 x float>* %a0 22 %2 = fcmp oeq <4 x float> %1, %a1 23 %3 = sext <4 x i1> %2 to <4 x i32> 24 ret <4 x i32> %3 25 } 26 27 define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) { 28 ; SSE-LABEL: commute_cmpps_ne: 29 ; SSE: # BB#0: 30 ; SSE-NEXT: cmpneqps (%rdi), %xmm0 31 ; SSE-NEXT: retq 32 ; 33 ; AVX-LABEL: commute_cmpps_ne: 34 ; AVX: # BB#0: 35 ; AVX-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0 36 ; AVX-NEXT: retq 37 ; 38 %1 = load <4 x float>, <4 x float>* %a0 39 %2 = fcmp une <4 x float> %1, %a1 40 %3 = sext <4 x i1> %2 to <4 x i32> 41 ret <4 x i32> %3 42 } 43 44 define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) { 45 ; SSE-LABEL: commute_cmpps_ord: 46 ; SSE: # BB#0: 47 ; SSE-NEXT: cmpordps (%rdi), %xmm0 48 ; SSE-NEXT: retq 49 ; 50 ; AVX-LABEL: commute_cmpps_ord: 51 ; AVX: # BB#0: 52 ; AVX-NEXT: vcmpordps (%rdi), %xmm0, %xmm0 53 ; AVX-NEXT: retq 54 ; 55 %1 = load <4 x float>, <4 x float>* %a0 56 %2 = fcmp ord <4 x float> %1, %a1 57 %3 = sext <4 x i1> %2 to <4 x i32> 58 ret <4 x i32> %3 59 } 60 61 define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) { 62 ; SSE-LABEL: commute_cmpps_uno: 63 ; SSE: # BB#0: 64 ; SSE-NEXT: cmpunordps (%rdi), %xmm0 65 ; SSE-NEXT: retq 66 ; 67 ; AVX-LABEL: commute_cmpps_uno: 68 ; AVX: # BB#0: 69 ; AVX-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0 70 ; AVX-NEXT: retq 71 ; 72 %1 = load <4 x float>, <4 x float>* %a0 73 %2 = fcmp uno <4 x float> %1, %a1 74 %3 = sext <4 x i1> %2 to <4 x i32> 75 ret <4 x i32> %3 76 } 77 78 define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) { 79 ; SSE-LABEL: commute_cmpps_ueq: 80 ; SSE: # BB#0: 81 ; SSE-NEXT: movaps (%rdi), %xmm1 82 ; SSE-NEXT: movaps %xmm1, %xmm2 83 ; SSE-NEXT: cmpeqps %xmm0, %xmm2 84 ; SSE-NEXT: cmpunordps %xmm1, %xmm0 85 ; SSE-NEXT: orps %xmm2, %xmm0 86 ; SSE-NEXT: retq 87 ; 88 ; AVX-LABEL: commute_cmpps_ueq: 89 ; AVX: # BB#0: 90 ; AVX-NEXT: vmovaps (%rdi), %xmm1 91 ; AVX-NEXT: vcmpeqps %xmm0, %xmm1, %xmm2 92 ; AVX-NEXT: vcmpunordps %xmm0, %xmm1, %xmm0 93 ; AVX-NEXT: vorps %xmm2, %xmm0, %xmm0 94 ; AVX-NEXT: retq 95 ; 96 %1 = load <4 x float>, <4 x float>* %a0 97 %2 = fcmp ueq <4 x float> %1, %a1 98 %3 = sext <4 x i1> %2 to <4 x i32> 99 ret <4 x i32> %3 100 } 101 102 define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) { 103 ; SSE-LABEL: commute_cmpps_one: 104 ; SSE: # BB#0: 105 ; SSE-NEXT: movaps (%rdi), %xmm1 106 ; SSE-NEXT: movaps %xmm1, %xmm2 107 ; SSE-NEXT: cmpneqps %xmm0, %xmm2 108 ; SSE-NEXT: cmpordps %xmm1, %xmm0 109 ; SSE-NEXT: andps %xmm2, %xmm0 110 ; SSE-NEXT: retq 111 ; 112 ; AVX-LABEL: commute_cmpps_one: 113 ; AVX: # BB#0: 114 ; AVX-NEXT: vmovaps (%rdi), %xmm1 115 ; AVX-NEXT: vcmpneqps %xmm0, %xmm1, %xmm2 116 ; AVX-NEXT: vcmpordps %xmm0, %xmm1, %xmm0 117 ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 118 ; AVX-NEXT: retq 119 ; 120 %1 = load <4 x float>, <4 x float>* %a0 121 %2 = fcmp one <4 x float> %1, %a1 122 %3 = sext <4 x i1> %2 to <4 x i32> 123 ret <4 x i32> %3 124 } 125 126 define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) { 127 ; SSE-LABEL: commute_cmpps_lt: 128 ; SSE: # BB#0: 129 ; SSE-NEXT: movaps (%rdi), %xmm1 130 ; SSE-NEXT: cmpltps %xmm0, %xmm1 131 ; SSE-NEXT: movaps %xmm1, %xmm0 132 ; SSE-NEXT: retq 133 ; 134 ; AVX-LABEL: commute_cmpps_lt: 135 ; AVX: # BB#0: 136 ; AVX-NEXT: vmovaps (%rdi), %xmm1 137 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 138 ; AVX-NEXT: retq 139 ; 140 %1 = load <4 x float>, <4 x float>* %a0 141 %2 = fcmp olt <4 x float> %1, %a1 142 %3 = sext <4 x i1> %2 to <4 x i32> 143 ret <4 x i32> %3 144 } 145 146 define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) { 147 ; SSE-LABEL: commute_cmpps_le: 148 ; SSE: # BB#0: 149 ; SSE-NEXT: movaps (%rdi), %xmm1 150 ; SSE-NEXT: cmpleps %xmm0, %xmm1 151 ; SSE-NEXT: movaps %xmm1, %xmm0 152 ; SSE-NEXT: retq 153 ; 154 ; AVX-LABEL: commute_cmpps_le: 155 ; AVX: # BB#0: 156 ; AVX-NEXT: vmovaps (%rdi), %xmm1 157 ; AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 158 ; AVX-NEXT: retq 159 ; 160 %1 = load <4 x float>, <4 x float>* %a0 161 %2 = fcmp ole <4 x float> %1, %a1 162 %3 = sext <4 x i1> %2 to <4 x i32> 163 ret <4 x i32> %3 164 } 165 166 define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) { 167 ; SSE-LABEL: commute_cmpps_eq_ymm: 168 ; SSE: # BB#0: 169 ; SSE-NEXT: cmpeqps (%rdi), %xmm0 170 ; SSE-NEXT: cmpeqps 16(%rdi), %xmm1 171 ; SSE-NEXT: retq 172 ; 173 ; AVX-LABEL: commute_cmpps_eq_ymm: 174 ; AVX: # BB#0: 175 ; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 176 ; AVX-NEXT: retq 177 ; 178 %1 = load <8 x float>, <8 x float>* %a0 179 %2 = fcmp oeq <8 x float> %1, %a1 180 %3 = sext <8 x i1> %2 to <8 x i32> 181 ret <8 x i32> %3 182 } 183 184 define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) { 185 ; SSE-LABEL: commute_cmpps_ne_ymm: 186 ; SSE: # BB#0: 187 ; SSE-NEXT: cmpneqps (%rdi), %xmm0 188 ; SSE-NEXT: cmpneqps 16(%rdi), %xmm1 189 ; SSE-NEXT: retq 190 ; 191 ; AVX-LABEL: commute_cmpps_ne_ymm: 192 ; AVX: # BB#0: 193 ; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0 194 ; AVX-NEXT: retq 195 ; 196 %1 = load <8 x float>, <8 x float>* %a0 197 %2 = fcmp une <8 x float> %1, %a1 198 %3 = sext <8 x i1> %2 to <8 x i32> 199 ret <8 x i32> %3 200 } 201 202 define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) { 203 ; SSE-LABEL: commute_cmpps_ord_ymm: 204 ; SSE: # BB#0: 205 ; SSE-NEXT: cmpordps (%rdi), %xmm0 206 ; SSE-NEXT: cmpordps 16(%rdi), %xmm1 207 ; SSE-NEXT: retq 208 ; 209 ; AVX-LABEL: commute_cmpps_ord_ymm: 210 ; AVX: # BB#0: 211 ; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0 212 ; AVX-NEXT: retq 213 ; 214 %1 = load <8 x float>, <8 x float>* %a0 215 %2 = fcmp ord <8 x float> %1, %a1 216 %3 = sext <8 x i1> %2 to <8 x i32> 217 ret <8 x i32> %3 218 } 219 220 define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) { 221 ; SSE-LABEL: commute_cmpps_uno_ymm: 222 ; SSE: # BB#0: 223 ; SSE-NEXT: cmpunordps (%rdi), %xmm0 224 ; SSE-NEXT: cmpunordps 16(%rdi), %xmm1 225 ; SSE-NEXT: retq 226 ; 227 ; AVX-LABEL: commute_cmpps_uno_ymm: 228 ; AVX: # BB#0: 229 ; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0 230 ; AVX-NEXT: retq 231 ; 232 %1 = load <8 x float>, <8 x float>* %a0 233 %2 = fcmp uno <8 x float> %1, %a1 234 %3 = sext <8 x i1> %2 to <8 x i32> 235 ret <8 x i32> %3 236 } 237 238 define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) { 239 ; SSE-LABEL: commute_cmpps_ueq_ymm: 240 ; SSE: # BB#0: 241 ; SSE-NEXT: movaps (%rdi), %xmm2 242 ; SSE-NEXT: movaps 16(%rdi), %xmm3 243 ; SSE-NEXT: movaps %xmm2, %xmm4 244 ; SSE-NEXT: cmpeqps %xmm0, %xmm4 245 ; SSE-NEXT: cmpunordps %xmm2, %xmm0 246 ; SSE-NEXT: orps %xmm4, %xmm0 247 ; SSE-NEXT: movaps %xmm3, %xmm2 248 ; SSE-NEXT: cmpeqps %xmm1, %xmm2 249 ; SSE-NEXT: cmpunordps %xmm3, %xmm1 250 ; SSE-NEXT: orps %xmm2, %xmm1 251 ; SSE-NEXT: retq 252 ; 253 ; AVX-LABEL: commute_cmpps_ueq_ymm: 254 ; AVX: # BB#0: 255 ; AVX-NEXT: vmovaps (%rdi), %ymm1 256 ; AVX-NEXT: vcmpeqps %ymm0, %ymm1, %ymm2 257 ; AVX-NEXT: vcmpunordps %ymm0, %ymm1, %ymm0 258 ; AVX-NEXT: vorps %ymm2, %ymm0, %ymm0 259 ; AVX-NEXT: retq 260 ; 261 %1 = load <8 x float>, <8 x float>* %a0 262 %2 = fcmp ueq <8 x float> %1, %a1 263 %3 = sext <8 x i1> %2 to <8 x i32> 264 ret <8 x i32> %3 265 } 266 267 define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) { 268 ; SSE-LABEL: commute_cmpps_one_ymm: 269 ; SSE: # BB#0: 270 ; SSE-NEXT: movaps (%rdi), %xmm2 271 ; SSE-NEXT: movaps 16(%rdi), %xmm3 272 ; SSE-NEXT: movaps %xmm2, %xmm4 273 ; SSE-NEXT: cmpneqps %xmm0, %xmm4 274 ; SSE-NEXT: cmpordps %xmm2, %xmm0 275 ; SSE-NEXT: andps %xmm4, %xmm0 276 ; SSE-NEXT: movaps %xmm3, %xmm2 277 ; SSE-NEXT: cmpneqps %xmm1, %xmm2 278 ; SSE-NEXT: cmpordps %xmm3, %xmm1 279 ; SSE-NEXT: andps %xmm2, %xmm1 280 ; SSE-NEXT: retq 281 ; 282 ; AVX-LABEL: commute_cmpps_one_ymm: 283 ; AVX: # BB#0: 284 ; AVX-NEXT: vmovaps (%rdi), %ymm1 285 ; AVX-NEXT: vcmpneqps %ymm0, %ymm1, %ymm2 286 ; AVX-NEXT: vcmpordps %ymm0, %ymm1, %ymm0 287 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 288 ; AVX-NEXT: retq 289 ; 290 %1 = load <8 x float>, <8 x float>* %a0 291 %2 = fcmp one <8 x float> %1, %a1 292 %3 = sext <8 x i1> %2 to <8 x i32> 293 ret <8 x i32> %3 294 } 295 296 define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) { 297 ; SSE-LABEL: commute_cmpps_lt_ymm: 298 ; SSE: # BB#0: 299 ; SSE-NEXT: movaps (%rdi), %xmm2 300 ; SSE-NEXT: movaps 16(%rdi), %xmm3 301 ; SSE-NEXT: cmpltps %xmm0, %xmm2 302 ; SSE-NEXT: cmpltps %xmm1, %xmm3 303 ; SSE-NEXT: movaps %xmm2, %xmm0 304 ; SSE-NEXT: movaps %xmm3, %xmm1 305 ; SSE-NEXT: retq 306 ; 307 ; AVX-LABEL: commute_cmpps_lt_ymm: 308 ; AVX: # BB#0: 309 ; AVX-NEXT: vmovaps (%rdi), %ymm1 310 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 311 ; AVX-NEXT: retq 312 ; 313 %1 = load <8 x float>, <8 x float>* %a0 314 %2 = fcmp olt <8 x float> %1, %a1 315 %3 = sext <8 x i1> %2 to <8 x i32> 316 ret <8 x i32> %3 317 } 318 319 define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) { 320 ; SSE-LABEL: commute_cmpps_le_ymm: 321 ; SSE: # BB#0: 322 ; SSE-NEXT: movaps (%rdi), %xmm2 323 ; SSE-NEXT: movaps 16(%rdi), %xmm3 324 ; SSE-NEXT: cmpleps %xmm0, %xmm2 325 ; SSE-NEXT: cmpleps %xmm1, %xmm3 326 ; SSE-NEXT: movaps %xmm2, %xmm0 327 ; SSE-NEXT: movaps %xmm3, %xmm1 328 ; SSE-NEXT: retq 329 ; 330 ; AVX-LABEL: commute_cmpps_le_ymm: 331 ; AVX: # BB#0: 332 ; AVX-NEXT: vmovaps (%rdi), %ymm1 333 ; AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0 334 ; AVX-NEXT: retq 335 ; 336 %1 = load <8 x float>, <8 x float>* %a0 337 %2 = fcmp ole <8 x float> %1, %a1 338 %3 = sext <8 x i1> %2 to <8 x i32> 339 ret <8 x i32> %3 340 } 341 342 ; 343 ; Double Comparisons 344 ; Only equal/not-equal/ordered/unordered can be safely commuted 345 ; 346 347 define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) { 348 ; SSE-LABEL: commute_cmppd_eq: 349 ; SSE: # BB#0: 350 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0 351 ; SSE-NEXT: retq 352 ; 353 ; AVX-LABEL: commute_cmppd_eq: 354 ; AVX: # BB#0: 355 ; AVX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 356 ; AVX-NEXT: retq 357 ; 358 %1 = load <2 x double>, <2 x double>* %a0 359 %2 = fcmp oeq <2 x double> %1, %a1 360 %3 = sext <2 x i1> %2 to <2 x i64> 361 ret <2 x i64> %3 362 } 363 364 define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) { 365 ; SSE-LABEL: commute_cmppd_ne: 366 ; SSE: # BB#0: 367 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0 368 ; SSE-NEXT: retq 369 ; 370 ; AVX-LABEL: commute_cmppd_ne: 371 ; AVX: # BB#0: 372 ; AVX-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0 373 ; AVX-NEXT: retq 374 ; 375 %1 = load <2 x double>, <2 x double>* %a0 376 %2 = fcmp une <2 x double> %1, %a1 377 %3 = sext <2 x i1> %2 to <2 x i64> 378 ret <2 x i64> %3 379 } 380 381 define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) { 382 ; SSE-LABEL: commute_cmppd_ord: 383 ; SSE: # BB#0: 384 ; SSE-NEXT: cmpordpd (%rdi), %xmm0 385 ; SSE-NEXT: retq 386 ; 387 ; AVX-LABEL: commute_cmppd_ord: 388 ; AVX: # BB#0: 389 ; AVX-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0 390 ; AVX-NEXT: retq 391 ; 392 %1 = load <2 x double>, <2 x double>* %a0 393 %2 = fcmp ord <2 x double> %1, %a1 394 %3 = sext <2 x i1> %2 to <2 x i64> 395 ret <2 x i64> %3 396 } 397 398 define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) { 399 ; SSE-LABEL: commute_cmppd_ueq: 400 ; SSE: # BB#0: 401 ; SSE-NEXT: movapd (%rdi), %xmm1 402 ; SSE-NEXT: movapd %xmm1, %xmm2 403 ; SSE-NEXT: cmpeqpd %xmm0, %xmm2 404 ; SSE-NEXT: cmpunordpd %xmm1, %xmm0 405 ; SSE-NEXT: orpd %xmm2, %xmm0 406 ; SSE-NEXT: retq 407 ; 408 ; AVX-LABEL: commute_cmppd_ueq: 409 ; AVX: # BB#0: 410 ; AVX-NEXT: vmovapd (%rdi), %xmm1 411 ; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm2 412 ; AVX-NEXT: vcmpunordpd %xmm0, %xmm1, %xmm0 413 ; AVX-NEXT: vorpd %xmm2, %xmm0, %xmm0 414 ; AVX-NEXT: retq 415 ; 416 %1 = load <2 x double>, <2 x double>* %a0 417 %2 = fcmp ueq <2 x double> %1, %a1 418 %3 = sext <2 x i1> %2 to <2 x i64> 419 ret <2 x i64> %3 420 } 421 422 define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) { 423 ; SSE-LABEL: commute_cmppd_one: 424 ; SSE: # BB#0: 425 ; SSE-NEXT: movapd (%rdi), %xmm1 426 ; SSE-NEXT: movapd %xmm1, %xmm2 427 ; SSE-NEXT: cmpneqpd %xmm0, %xmm2 428 ; SSE-NEXT: cmpordpd %xmm1, %xmm0 429 ; SSE-NEXT: andpd %xmm2, %xmm0 430 ; SSE-NEXT: retq 431 ; 432 ; AVX-LABEL: commute_cmppd_one: 433 ; AVX: # BB#0: 434 ; AVX-NEXT: vmovapd (%rdi), %xmm1 435 ; AVX-NEXT: vcmpneqpd %xmm0, %xmm1, %xmm2 436 ; AVX-NEXT: vcmpordpd %xmm0, %xmm1, %xmm0 437 ; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0 438 ; AVX-NEXT: retq 439 ; 440 %1 = load <2 x double>, <2 x double>* %a0 441 %2 = fcmp one <2 x double> %1, %a1 442 %3 = sext <2 x i1> %2 to <2 x i64> 443 ret <2 x i64> %3 444 } 445 446 define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) { 447 ; SSE-LABEL: commute_cmppd_uno: 448 ; SSE: # BB#0: 449 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0 450 ; SSE-NEXT: retq 451 ; 452 ; AVX-LABEL: commute_cmppd_uno: 453 ; AVX: # BB#0: 454 ; AVX-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0 455 ; AVX-NEXT: retq 456 ; 457 %1 = load <2 x double>, <2 x double>* %a0 458 %2 = fcmp uno <2 x double> %1, %a1 459 %3 = sext <2 x i1> %2 to <2 x i64> 460 ret <2 x i64> %3 461 } 462 463 define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) { 464 ; SSE-LABEL: commute_cmppd_lt: 465 ; SSE: # BB#0: 466 ; SSE-NEXT: movapd (%rdi), %xmm1 467 ; SSE-NEXT: cmpltpd %xmm0, %xmm1 468 ; SSE-NEXT: movapd %xmm1, %xmm0 469 ; SSE-NEXT: retq 470 ; 471 ; AVX-LABEL: commute_cmppd_lt: 472 ; AVX: # BB#0: 473 ; AVX-NEXT: vmovapd (%rdi), %xmm1 474 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 475 ; AVX-NEXT: retq 476 ; 477 %1 = load <2 x double>, <2 x double>* %a0 478 %2 = fcmp olt <2 x double> %1, %a1 479 %3 = sext <2 x i1> %2 to <2 x i64> 480 ret <2 x i64> %3 481 } 482 483 define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) { 484 ; SSE-LABEL: commute_cmppd_le: 485 ; SSE: # BB#0: 486 ; SSE-NEXT: movapd (%rdi), %xmm1 487 ; SSE-NEXT: cmplepd %xmm0, %xmm1 488 ; SSE-NEXT: movapd %xmm1, %xmm0 489 ; SSE-NEXT: retq 490 ; 491 ; AVX-LABEL: commute_cmppd_le: 492 ; AVX: # BB#0: 493 ; AVX-NEXT: vmovapd (%rdi), %xmm1 494 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 495 ; AVX-NEXT: retq 496 ; 497 %1 = load <2 x double>, <2 x double>* %a0 498 %2 = fcmp ole <2 x double> %1, %a1 499 %3 = sext <2 x i1> %2 to <2 x i64> 500 ret <2 x i64> %3 501 } 502 503 define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) { 504 ; SSE-LABEL: commute_cmppd_eq_ymmm: 505 ; SSE: # BB#0: 506 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0 507 ; SSE-NEXT: cmpeqpd 16(%rdi), %xmm1 508 ; SSE-NEXT: retq 509 ; 510 ; AVX-LABEL: commute_cmppd_eq_ymmm: 511 ; AVX: # BB#0: 512 ; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 513 ; AVX-NEXT: retq 514 ; 515 %1 = load <4 x double>, <4 x double>* %a0 516 %2 = fcmp oeq <4 x double> %1, %a1 517 %3 = sext <4 x i1> %2 to <4 x i64> 518 ret <4 x i64> %3 519 } 520 521 define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) { 522 ; SSE-LABEL: commute_cmppd_ne_ymmm: 523 ; SSE: # BB#0: 524 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0 525 ; SSE-NEXT: cmpneqpd 16(%rdi), %xmm1 526 ; SSE-NEXT: retq 527 ; 528 ; AVX-LABEL: commute_cmppd_ne_ymmm: 529 ; AVX: # BB#0: 530 ; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0 531 ; AVX-NEXT: retq 532 ; 533 %1 = load <4 x double>, <4 x double>* %a0 534 %2 = fcmp une <4 x double> %1, %a1 535 %3 = sext <4 x i1> %2 to <4 x i64> 536 ret <4 x i64> %3 537 } 538 539 define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) { 540 ; SSE-LABEL: commute_cmppd_ord_ymmm: 541 ; SSE: # BB#0: 542 ; SSE-NEXT: cmpordpd (%rdi), %xmm0 543 ; SSE-NEXT: cmpordpd 16(%rdi), %xmm1 544 ; SSE-NEXT: retq 545 ; 546 ; AVX-LABEL: commute_cmppd_ord_ymmm: 547 ; AVX: # BB#0: 548 ; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0 549 ; AVX-NEXT: retq 550 ; 551 %1 = load <4 x double>, <4 x double>* %a0 552 %2 = fcmp ord <4 x double> %1, %a1 553 %3 = sext <4 x i1> %2 to <4 x i64> 554 ret <4 x i64> %3 555 } 556 557 define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) { 558 ; SSE-LABEL: commute_cmppd_uno_ymmm: 559 ; SSE: # BB#0: 560 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0 561 ; SSE-NEXT: cmpunordpd 16(%rdi), %xmm1 562 ; SSE-NEXT: retq 563 ; 564 ; AVX-LABEL: commute_cmppd_uno_ymmm: 565 ; AVX: # BB#0: 566 ; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0 567 ; AVX-NEXT: retq 568 ; 569 %1 = load <4 x double>, <4 x double>* %a0 570 %2 = fcmp uno <4 x double> %1, %a1 571 %3 = sext <4 x i1> %2 to <4 x i64> 572 ret <4 x i64> %3 573 } 574 575 define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) { 576 ; SSE-LABEL: commute_cmppd_ueq_ymmm: 577 ; SSE: # BB#0: 578 ; SSE-NEXT: movapd (%rdi), %xmm2 579 ; SSE-NEXT: movapd 16(%rdi), %xmm3 580 ; SSE-NEXT: movapd %xmm2, %xmm4 581 ; SSE-NEXT: cmpeqpd %xmm0, %xmm4 582 ; SSE-NEXT: cmpunordpd %xmm2, %xmm0 583 ; SSE-NEXT: orpd %xmm4, %xmm0 584 ; SSE-NEXT: movapd %xmm3, %xmm2 585 ; SSE-NEXT: cmpeqpd %xmm1, %xmm2 586 ; SSE-NEXT: cmpunordpd %xmm3, %xmm1 587 ; SSE-NEXT: orpd %xmm2, %xmm1 588 ; SSE-NEXT: retq 589 ; 590 ; AVX-LABEL: commute_cmppd_ueq_ymmm: 591 ; AVX: # BB#0: 592 ; AVX-NEXT: vmovapd (%rdi), %ymm1 593 ; AVX-NEXT: vcmpeqpd %ymm0, %ymm1, %ymm2 594 ; AVX-NEXT: vcmpunordpd %ymm0, %ymm1, %ymm0 595 ; AVX-NEXT: vorpd %ymm2, %ymm0, %ymm0 596 ; AVX-NEXT: retq 597 ; 598 %1 = load <4 x double>, <4 x double>* %a0 599 %2 = fcmp ueq <4 x double> %1, %a1 600 %3 = sext <4 x i1> %2 to <4 x i64> 601 ret <4 x i64> %3 602 } 603 604 define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) { 605 ; SSE-LABEL: commute_cmppd_one_ymmm: 606 ; SSE: # BB#0: 607 ; SSE-NEXT: movapd (%rdi), %xmm2 608 ; SSE-NEXT: movapd 16(%rdi), %xmm3 609 ; SSE-NEXT: movapd %xmm2, %xmm4 610 ; SSE-NEXT: cmpneqpd %xmm0, %xmm4 611 ; SSE-NEXT: cmpordpd %xmm2, %xmm0 612 ; SSE-NEXT: andpd %xmm4, %xmm0 613 ; SSE-NEXT: movapd %xmm3, %xmm2 614 ; SSE-NEXT: cmpneqpd %xmm1, %xmm2 615 ; SSE-NEXT: cmpordpd %xmm3, %xmm1 616 ; SSE-NEXT: andpd %xmm2, %xmm1 617 ; SSE-NEXT: retq 618 ; 619 ; AVX-LABEL: commute_cmppd_one_ymmm: 620 ; AVX: # BB#0: 621 ; AVX-NEXT: vmovapd (%rdi), %ymm1 622 ; AVX-NEXT: vcmpneqpd %ymm0, %ymm1, %ymm2 623 ; AVX-NEXT: vcmpordpd %ymm0, %ymm1, %ymm0 624 ; AVX-NEXT: vandpd %ymm2, %ymm0, %ymm0 625 ; AVX-NEXT: retq 626 ; 627 %1 = load <4 x double>, <4 x double>* %a0 628 %2 = fcmp one <4 x double> %1, %a1 629 %3 = sext <4 x i1> %2 to <4 x i64> 630 ret <4 x i64> %3 631 } 632 633 define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) { 634 ; SSE-LABEL: commute_cmppd_lt_ymmm: 635 ; SSE: # BB#0: 636 ; SSE-NEXT: movapd (%rdi), %xmm2 637 ; SSE-NEXT: movapd 16(%rdi), %xmm3 638 ; SSE-NEXT: cmpltpd %xmm0, %xmm2 639 ; SSE-NEXT: cmpltpd %xmm1, %xmm3 640 ; SSE-NEXT: movapd %xmm2, %xmm0 641 ; SSE-NEXT: movapd %xmm3, %xmm1 642 ; SSE-NEXT: retq 643 ; 644 ; AVX-LABEL: commute_cmppd_lt_ymmm: 645 ; AVX: # BB#0: 646 ; AVX-NEXT: vmovapd (%rdi), %ymm1 647 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 648 ; AVX-NEXT: retq 649 ; 650 %1 = load <4 x double>, <4 x double>* %a0 651 %2 = fcmp olt <4 x double> %1, %a1 652 %3 = sext <4 x i1> %2 to <4 x i64> 653 ret <4 x i64> %3 654 } 655 656 define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) { 657 ; SSE-LABEL: commute_cmppd_le_ymmm: 658 ; SSE: # BB#0: 659 ; SSE-NEXT: movapd (%rdi), %xmm2 660 ; SSE-NEXT: movapd 16(%rdi), %xmm3 661 ; SSE-NEXT: cmplepd %xmm0, %xmm2 662 ; SSE-NEXT: cmplepd %xmm1, %xmm3 663 ; SSE-NEXT: movapd %xmm2, %xmm0 664 ; SSE-NEXT: movapd %xmm3, %xmm1 665 ; SSE-NEXT: retq 666 ; 667 ; AVX-LABEL: commute_cmppd_le_ymmm: 668 ; AVX: # BB#0: 669 ; AVX-NEXT: vmovapd (%rdi), %ymm1 670 ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 671 ; AVX-NEXT: retq 672 ; 673 %1 = load <4 x double>, <4 x double>* %a0 674 %2 = fcmp ole <4 x double> %1, %a1 675 %3 = sext <4 x i1> %2 to <4 x i64> 676 ret <4 x i64> %3 677 } 678