1 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 2 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX 3 4 ; 5 ; Float Comparisons 6 ; Only equal/not-equal/ordered/unordered can be safely commuted 7 ; 8 9 define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) #0 { 10 ;SSE-LABEL: commute_cmpps_eq 11 ;SSE: cmpeqps (%rdi), %xmm0 12 ;SSE-NEXT: retq 13 14 ;AVX-LABEL: commute_cmpps_eq 15 ;AVX: vcmpeqps (%rdi), %xmm0, %xmm0 16 ;AVX-NEXT: retq 17 18 %1 = load <4 x float>, <4 x float>* %a0 19 %2 = fcmp oeq <4 x float> %1, %a1 20 %3 = sext <4 x i1> %2 to <4 x i32> 21 ret <4 x i32> %3 22 } 23 24 define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) #0 { 25 ;SSE-LABEL: commute_cmpps_ne 26 ;SSE: cmpneqps (%rdi), %xmm0 27 ;SSE-NEXT: retq 28 29 ;AVX-LABEL: commute_cmpps_ne 30 ;AVX: vcmpneqps (%rdi), %xmm0, %xmm0 31 ;AVX-NEXT: retq 32 33 %1 = load <4 x float>, <4 x float>* %a0 34 %2 = fcmp une <4 x float> %1, %a1 35 %3 = sext <4 x i1> %2 to <4 x i32> 36 ret <4 x i32> %3 37 } 38 39 define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) #0 { 40 ;SSE-LABEL: commute_cmpps_ord 41 ;SSE: cmpordps (%rdi), %xmm0 42 ;SSE-NEXT: retq 43 44 ;AVX-LABEL: commute_cmpps_ord 45 ;AVX: vcmpordps (%rdi), %xmm0, %xmm0 46 ;AVX-NEXT: retq 47 48 %1 = load <4 x float>, <4 x float>* %a0 49 %2 = fcmp ord <4 x float> %1, %a1 50 %3 = sext <4 x i1> %2 to <4 x i32> 51 ret <4 x i32> %3 52 } 53 54 define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) #0 { 55 ;SSE-LABEL: commute_cmpps_uno 56 ;SSE: cmpunordps (%rdi), %xmm0 57 ;SSE-NEXT: retq 58 59 ;AVX-LABEL: commute_cmpps_uno 60 ;AVX: vcmpunordps (%rdi), %xmm0, %xmm0 61 ;AVX-NEXT: retq 62 63 %1 = load <4 x float>, <4 x float>* %a0 64 %2 = fcmp uno <4 x float> %1, %a1 65 %3 = sext <4 x i1> %2 to <4 x i32> 66 ret <4 x i32> %3 67 } 68 69 define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) #0 { 70 ;SSE-LABEL: commute_cmpps_lt 71 ;SSE: movaps (%rdi), %xmm1 72 ;SSE-NEXT: cmpltps %xmm0, %xmm1 73 ;SSE-NEXT: movaps %xmm1, %xmm0 74 ;SSE-NEXT: retq 75 76 ;AVX-LABEL: commute_cmpps_lt 77 ;AVX: vmovaps (%rdi), %xmm1 78 ;AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 79 ;AVX-NEXT: retq 80 81 %1 = load <4 x float>, <4 x float>* %a0 82 %2 = fcmp olt <4 x float> %1, %a1 83 %3 = sext <4 x i1> %2 to <4 x i32> 84 ret <4 x i32> %3 85 } 86 87 define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) #0 { 88 ;SSE-LABEL: commute_cmpps_le 89 ;SSE: movaps (%rdi), %xmm1 90 ;SSE-NEXT: cmpleps %xmm0, %xmm1 91 ;SSE-NEXT: movaps %xmm1, %xmm0 92 ;SSE-NEXT: retq 93 94 ;AVX-LABEL: commute_cmpps_le 95 ;AVX: vmovaps (%rdi), %xmm1 96 ;AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 97 ;AVX-NEXT: retq 98 99 %1 = load <4 x float>, <4 x float>* %a0 100 %2 = fcmp ole <4 x float> %1, %a1 101 %3 = sext <4 x i1> %2 to <4 x i32> 102 ret <4 x i32> %3 103 } 104 105 define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 106 ;AVX-LABEL: commute_cmpps_eq_ymm 107 ;AVX: vcmpeqps (%rdi), %ymm0, %ymm0 108 ;AVX-NEXT: retq 109 110 %1 = load <8 x float>, <8 x float>* %a0 111 %2 = fcmp oeq <8 x float> %1, %a1 112 %3 = sext <8 x i1> %2 to <8 x i32> 113 ret <8 x i32> %3 114 } 115 116 define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 117 ;AVX-LABEL: commute_cmpps_ne_ymm 118 ;AVX: vcmpneqps (%rdi), %ymm0, %ymm0 119 ;AVX-NEXT: retq 120 121 %1 = load <8 x float>, <8 x float>* %a0 122 %2 = fcmp une <8 x float> %1, %a1 123 %3 = sext <8 x i1> %2 to <8 x i32> 124 ret <8 x i32> %3 125 } 126 127 define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 128 ;AVX-LABEL: commute_cmpps_ord_ymm 129 ;AVX: vcmpordps (%rdi), %ymm0, %ymm0 130 ;AVX-NEXT: retq 131 132 %1 = load <8 x float>, <8 x float>* %a0 133 %2 = fcmp ord <8 x float> %1, %a1 134 %3 = sext <8 x i1> %2 to <8 x i32> 135 ret <8 x i32> %3 136 } 137 138 define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 139 ;AVX-LABEL: commute_cmpps_uno_ymm 140 ;AVX: vcmpunordps (%rdi), %ymm0, %ymm0 141 ;AVX-NEXT: retq 142 143 %1 = load <8 x float>, <8 x float>* %a0 144 %2 = fcmp uno <8 x float> %1, %a1 145 %3 = sext <8 x i1> %2 to <8 x i32> 146 ret <8 x i32> %3 147 } 148 149 define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 150 ;AVX-LABEL: commute_cmpps_lt_ymm 151 ;AVX: vmovaps (%rdi), %ymm1 152 ;AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 153 ;AVX-NEXT: retq 154 155 %1 = load <8 x float>, <8 x float>* %a0 156 %2 = fcmp olt <8 x float> %1, %a1 157 %3 = sext <8 x i1> %2 to <8 x i32> 158 ret <8 x i32> %3 159 } 160 161 define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 162 ;AVX-LABEL: commute_cmpps_le_ymm 163 ;AVX: vmovaps (%rdi), %ymm1 164 ;AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0 165 ;AVX-NEXT: retq 166 167 %1 = load <8 x float>, <8 x float>* %a0 168 %2 = fcmp ole <8 x float> %1, %a1 169 %3 = sext <8 x i1> %2 to <8 x i32> 170 ret <8 x i32> %3 171 } 172 173 ; 174 ; Double Comparisons 175 ; Only equal/not-equal/ordered/unordered can be safely commuted 176 ; 177 178 define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) #0 { 179 ;SSE-LABEL: commute_cmppd_eq 180 ;SSE: cmpeqpd (%rdi), %xmm0 181 ;SSE-NEXT: retq 182 183 ;AVX-LABEL: commute_cmppd_eq 184 ;AVX: vcmpeqpd (%rdi), %xmm0, %xmm0 185 ;AVX-NEXT: retq 186 187 %1 = load <2 x double>, <2 x double>* %a0 188 %2 = fcmp oeq <2 x double> %1, %a1 189 %3 = sext <2 x i1> %2 to <2 x i64> 190 ret <2 x i64> %3 191 } 192 193 define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) #0 { 194 ;SSE-LABEL: commute_cmppd_ne 195 ;SSE: cmpneqpd (%rdi), %xmm0 196 ;SSE-NEXT: retq 197 198 ;AVX-LABEL: commute_cmppd_ne 199 ;AVX: vcmpneqpd (%rdi), %xmm0, %xmm0 200 ;AVX-NEXT: retq 201 202 %1 = load <2 x double>, <2 x double>* %a0 203 %2 = fcmp une <2 x double> %1, %a1 204 %3 = sext <2 x i1> %2 to <2 x i64> 205 ret <2 x i64> %3 206 } 207 208 define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) #0 { 209 ;SSE-LABEL: commute_cmppd_ord 210 ;SSE: cmpordpd (%rdi), %xmm0 211 ;SSE-NEXT: retq 212 213 ;AVX-LABEL: commute_cmppd_ord 214 ;AVX: vcmpordpd (%rdi), %xmm0, %xmm0 215 ;AVX-NEXT: retq 216 217 %1 = load <2 x double>, <2 x double>* %a0 218 %2 = fcmp ord <2 x double> %1, %a1 219 %3 = sext <2 x i1> %2 to <2 x i64> 220 ret <2 x i64> %3 221 } 222 223 define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) #0 { 224 ;SSE-LABEL: commute_cmppd_uno 225 ;SSE: cmpunordpd (%rdi), %xmm0 226 ;SSE-NEXT: retq 227 228 ;AVX-LABEL: commute_cmppd_uno 229 ;AVX: vcmpunordpd (%rdi), %xmm0, %xmm0 230 ;AVX-NEXT: retq 231 232 %1 = load <2 x double>, <2 x double>* %a0 233 %2 = fcmp uno <2 x double> %1, %a1 234 %3 = sext <2 x i1> %2 to <2 x i64> 235 ret <2 x i64> %3 236 } 237 238 define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) #0 { 239 ;SSE-LABEL: commute_cmppd_lt 240 ;SSE: movapd (%rdi), %xmm1 241 ;SSE-NEXT: cmpltpd %xmm0, %xmm1 242 ;SSE-NEXT: movapd %xmm1, %xmm0 243 ;SSE-NEXT: retq 244 245 ;AVX-LABEL: commute_cmppd_lt 246 ;AVX: vmovapd (%rdi), %xmm1 247 ;AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 248 ;AVX-NEXT: retq 249 250 %1 = load <2 x double>, <2 x double>* %a0 251 %2 = fcmp olt <2 x double> %1, %a1 252 %3 = sext <2 x i1> %2 to <2 x i64> 253 ret <2 x i64> %3 254 } 255 256 define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) #0 { 257 ;SSE-LABEL: commute_cmppd_le 258 ;SSE: movapd (%rdi), %xmm1 259 ;SSE-NEXT: cmplepd %xmm0, %xmm1 260 ;SSE-NEXT: movapd %xmm1, %xmm0 261 ;SSE-NEXT: retq 262 263 ;AVX-LABEL: commute_cmppd_le 264 ;AVX: vmovapd (%rdi), %xmm1 265 ;AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 266 ;AVX-NEXT: retq 267 268 %1 = load <2 x double>, <2 x double>* %a0 269 %2 = fcmp ole <2 x double> %1, %a1 270 %3 = sext <2 x i1> %2 to <2 x i64> 271 ret <2 x i64> %3 272 } 273 274 define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 275 ;AVX-LABEL: commute_cmppd_eq 276 ;AVX: vcmpeqpd (%rdi), %ymm0, %ymm0 277 ;AVX-NEXT: retq 278 279 %1 = load <4 x double>, <4 x double>* %a0 280 %2 = fcmp oeq <4 x double> %1, %a1 281 %3 = sext <4 x i1> %2 to <4 x i64> 282 ret <4 x i64> %3 283 } 284 285 define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 286 ;AVX-LABEL: commute_cmppd_ne 287 ;AVX: vcmpneqpd (%rdi), %ymm0, %ymm0 288 ;AVX-NEXT: retq 289 290 %1 = load <4 x double>, <4 x double>* %a0 291 %2 = fcmp une <4 x double> %1, %a1 292 %3 = sext <4 x i1> %2 to <4 x i64> 293 ret <4 x i64> %3 294 } 295 296 define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 297 ;AVX-LABEL: commute_cmppd_ord 298 ;AVX: vcmpordpd (%rdi), %ymm0, %ymm0 299 ;AVX-NEXT: retq 300 301 %1 = load <4 x double>, <4 x double>* %a0 302 %2 = fcmp ord <4 x double> %1, %a1 303 %3 = sext <4 x i1> %2 to <4 x i64> 304 ret <4 x i64> %3 305 } 306 307 define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 308 ;AVX-LABEL: commute_cmppd_uno 309 ;AVX: vcmpunordpd (%rdi), %ymm0, %ymm0 310 ;AVX-NEXT: retq 311 312 %1 = load <4 x double>, <4 x double>* %a0 313 %2 = fcmp uno <4 x double> %1, %a1 314 %3 = sext <4 x i1> %2 to <4 x i64> 315 ret <4 x i64> %3 316 } 317 318 define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 319 ;AVX-LABEL: commute_cmppd_lt 320 ;AVX: vmovapd (%rdi), %ymm1 321 ;AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 322 ;AVX-NEXT: retq 323 324 %1 = load <4 x double>, <4 x double>* %a0 325 %2 = fcmp olt <4 x double> %1, %a1 326 %3 = sext <4 x i1> %2 to <4 x i64> 327 ret <4 x i64> %3 328 } 329 330 define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 331 ;AVX-LABEL: commute_cmppd_le 332 ;AVX: vmovapd (%rdi), %ymm1 333 ;AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 334 ;AVX-NEXT: retq 335 336 %1 = load <4 x double>, <4 x double>* %a0 337 %2 = fcmp ole <4 x double> %1, %a1 338 %3 = sext <4 x i1> %2 to <4 x i64> 339 ret <4 x i64> %3 340 } 341