Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
      2 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX
      3 
      4 ;
      5 ; Float Comparisons
      6 ; Only equal/not-equal/ordered/unordered can be safely commuted
      7 ;
      8 
      9 define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) #0 {
     10   ;SSE-LABEL: commute_cmpps_eq
     11   ;SSE:       cmpeqps (%rdi), %xmm0
     12   ;SSE-NEXT:  retq
     13 
     14   ;AVX-LABEL: commute_cmpps_eq
     15   ;AVX:       vcmpeqps (%rdi), %xmm0, %xmm0
     16   ;AVX-NEXT:  retq
     17 
     18   %1 = load <4 x float>, <4 x float>* %a0
     19   %2 = fcmp oeq <4 x float> %1, %a1
     20   %3 = sext <4 x i1> %2 to <4 x i32>
     21   ret <4 x i32> %3
     22 }
     23 
     24 define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) #0 {
     25   ;SSE-LABEL: commute_cmpps_ne
     26   ;SSE:       cmpneqps (%rdi), %xmm0
     27   ;SSE-NEXT:  retq
     28 
     29   ;AVX-LABEL: commute_cmpps_ne
     30   ;AVX:       vcmpneqps (%rdi), %xmm0, %xmm0
     31   ;AVX-NEXT:  retq
     32 
     33   %1 = load <4 x float>, <4 x float>* %a0
     34   %2 = fcmp une <4 x float> %1, %a1
     35   %3 = sext <4 x i1> %2 to <4 x i32>
     36   ret <4 x i32> %3
     37 }
     38 
     39 define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) #0 {
     40   ;SSE-LABEL: commute_cmpps_ord
     41   ;SSE:       cmpordps (%rdi), %xmm0
     42   ;SSE-NEXT:  retq
     43 
     44   ;AVX-LABEL: commute_cmpps_ord
     45   ;AVX:       vcmpordps (%rdi), %xmm0, %xmm0
     46   ;AVX-NEXT:  retq
     47 
     48   %1 = load <4 x float>, <4 x float>* %a0
     49   %2 = fcmp ord <4 x float> %1, %a1
     50   %3 = sext <4 x i1> %2 to <4 x i32>
     51   ret <4 x i32> %3
     52 }
     53 
     54 define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) #0 {
     55   ;SSE-LABEL: commute_cmpps_uno
     56   ;SSE:       cmpunordps (%rdi), %xmm0
     57   ;SSE-NEXT:  retq
     58 
     59   ;AVX-LABEL: commute_cmpps_uno
     60   ;AVX:       vcmpunordps (%rdi), %xmm0, %xmm0
     61   ;AVX-NEXT:  retq
     62 
     63   %1 = load <4 x float>, <4 x float>* %a0
     64   %2 = fcmp uno <4 x float> %1, %a1
     65   %3 = sext <4 x i1> %2 to <4 x i32>
     66   ret <4 x i32> %3
     67 }
     68 
     69 define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) #0 {
     70   ;SSE-LABEL: commute_cmpps_lt
     71   ;SSE:       movaps (%rdi), %xmm1
     72   ;SSE-NEXT:  cmpltps %xmm0, %xmm1
     73   ;SSE-NEXT:  movaps %xmm1, %xmm0
     74   ;SSE-NEXT:  retq
     75 
     76   ;AVX-LABEL: commute_cmpps_lt
     77   ;AVX:       vmovaps (%rdi), %xmm1
     78   ;AVX-NEXT:  vcmpltps %xmm0, %xmm1, %xmm0
     79   ;AVX-NEXT:  retq
     80 
     81   %1 = load <4 x float>, <4 x float>* %a0
     82   %2 = fcmp olt <4 x float> %1, %a1
     83   %3 = sext <4 x i1> %2 to <4 x i32>
     84   ret <4 x i32> %3
     85 }
     86 
     87 define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) #0 {
     88   ;SSE-LABEL: commute_cmpps_le
     89   ;SSE:       movaps (%rdi), %xmm1
     90   ;SSE-NEXT:  cmpleps %xmm0, %xmm1
     91   ;SSE-NEXT:  movaps %xmm1, %xmm0
     92   ;SSE-NEXT:  retq
     93 
     94   ;AVX-LABEL: commute_cmpps_le
     95   ;AVX:       vmovaps (%rdi), %xmm1
     96   ;AVX-NEXT:  vcmpleps %xmm0, %xmm1, %xmm0
     97   ;AVX-NEXT:  retq
     98 
     99   %1 = load <4 x float>, <4 x float>* %a0
    100   %2 = fcmp ole <4 x float> %1, %a1
    101   %3 = sext <4 x i1> %2 to <4 x i32>
    102   ret <4 x i32> %3
    103 }
    104 
    105 define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
    106   ;AVX-LABEL: commute_cmpps_eq_ymm
    107   ;AVX:       vcmpeqps (%rdi), %ymm0, %ymm0
    108   ;AVX-NEXT:  retq
    109 
    110   %1 = load <8 x float>, <8 x float>* %a0
    111   %2 = fcmp oeq <8 x float> %1, %a1
    112   %3 = sext <8 x i1> %2 to <8 x i32>
    113   ret <8 x i32> %3
    114 }
    115 
    116 define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
    117   ;AVX-LABEL: commute_cmpps_ne_ymm
    118   ;AVX:       vcmpneqps (%rdi), %ymm0, %ymm0
    119   ;AVX-NEXT:  retq
    120 
    121   %1 = load <8 x float>, <8 x float>* %a0
    122   %2 = fcmp une <8 x float> %1, %a1
    123   %3 = sext <8 x i1> %2 to <8 x i32>
    124   ret <8 x i32> %3
    125 }
    126 
    127 define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
    128   ;AVX-LABEL: commute_cmpps_ord_ymm
    129   ;AVX:       vcmpordps (%rdi), %ymm0, %ymm0
    130   ;AVX-NEXT:  retq
    131 
    132   %1 = load <8 x float>, <8 x float>* %a0
    133   %2 = fcmp ord <8 x float> %1, %a1
    134   %3 = sext <8 x i1> %2 to <8 x i32>
    135   ret <8 x i32> %3
    136 }
    137 
    138 define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
    139   ;AVX-LABEL: commute_cmpps_uno_ymm
    140   ;AVX:       vcmpunordps (%rdi), %ymm0, %ymm0
    141   ;AVX-NEXT:  retq
    142 
    143   %1 = load <8 x float>, <8 x float>* %a0
    144   %2 = fcmp uno <8 x float> %1, %a1
    145   %3 = sext <8 x i1> %2 to <8 x i32>
    146   ret <8 x i32> %3
    147 }
    148 
    149 define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
    150   ;AVX-LABEL: commute_cmpps_lt_ymm
    151   ;AVX:       vmovaps (%rdi), %ymm1
    152   ;AVX-NEXT:  vcmpltps %ymm0, %ymm1, %ymm0
    153   ;AVX-NEXT:  retq
    154 
    155   %1 = load <8 x float>, <8 x float>* %a0
    156   %2 = fcmp olt <8 x float> %1, %a1
    157   %3 = sext <8 x i1> %2 to <8 x i32>
    158   ret <8 x i32> %3
    159 }
    160 
    161 define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
    162   ;AVX-LABEL: commute_cmpps_le_ymm
    163   ;AVX:       vmovaps (%rdi), %ymm1
    164   ;AVX-NEXT:  vcmpleps %ymm0, %ymm1, %ymm0
    165   ;AVX-NEXT:  retq
    166 
    167   %1 = load <8 x float>, <8 x float>* %a0
    168   %2 = fcmp ole <8 x float> %1, %a1
    169   %3 = sext <8 x i1> %2 to <8 x i32>
    170   ret <8 x i32> %3
    171 }
    172 
    173 ;
    174 ; Double Comparisons
    175 ; Only equal/not-equal/ordered/unordered can be safely commuted
    176 ;
    177 
    178 define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) #0 {
    179   ;SSE-LABEL: commute_cmppd_eq
    180   ;SSE:       cmpeqpd (%rdi), %xmm0
    181   ;SSE-NEXT:  retq
    182 
    183   ;AVX-LABEL: commute_cmppd_eq
    184   ;AVX:       vcmpeqpd (%rdi), %xmm0, %xmm0
    185   ;AVX-NEXT:  retq
    186 
    187   %1 = load <2 x double>, <2 x double>* %a0
    188   %2 = fcmp oeq <2 x double> %1, %a1
    189   %3 = sext <2 x i1> %2 to <2 x i64>
    190   ret <2 x i64> %3
    191 }
    192 
    193 define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) #0 {
    194   ;SSE-LABEL: commute_cmppd_ne
    195   ;SSE:       cmpneqpd (%rdi), %xmm0
    196   ;SSE-NEXT:  retq
    197 
    198   ;AVX-LABEL: commute_cmppd_ne
    199   ;AVX:       vcmpneqpd (%rdi), %xmm0, %xmm0
    200   ;AVX-NEXT:  retq
    201 
    202   %1 = load <2 x double>, <2 x double>* %a0
    203   %2 = fcmp une <2 x double> %1, %a1
    204   %3 = sext <2 x i1> %2 to <2 x i64>
    205   ret <2 x i64> %3
    206 }
    207 
    208 define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) #0 {
    209   ;SSE-LABEL: commute_cmppd_ord
    210   ;SSE:       cmpordpd (%rdi), %xmm0
    211   ;SSE-NEXT:  retq
    212 
    213   ;AVX-LABEL: commute_cmppd_ord
    214   ;AVX:       vcmpordpd (%rdi), %xmm0, %xmm0
    215   ;AVX-NEXT:  retq
    216 
    217   %1 = load <2 x double>, <2 x double>* %a0
    218   %2 = fcmp ord <2 x double> %1, %a1
    219   %3 = sext <2 x i1> %2 to <2 x i64>
    220   ret <2 x i64> %3
    221 }
    222 
    223 define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) #0 {
    224   ;SSE-LABEL: commute_cmppd_uno
    225   ;SSE:       cmpunordpd (%rdi), %xmm0
    226   ;SSE-NEXT:  retq
    227 
    228   ;AVX-LABEL: commute_cmppd_uno
    229   ;AVX:       vcmpunordpd (%rdi), %xmm0, %xmm0
    230   ;AVX-NEXT:  retq
    231 
    232   %1 = load <2 x double>, <2 x double>* %a0
    233   %2 = fcmp uno <2 x double> %1, %a1
    234   %3 = sext <2 x i1> %2 to <2 x i64>
    235   ret <2 x i64> %3
    236 }
    237 
    238 define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) #0 {
    239   ;SSE-LABEL: commute_cmppd_lt
    240   ;SSE:       movapd (%rdi), %xmm1
    241   ;SSE-NEXT:  cmpltpd %xmm0, %xmm1
    242   ;SSE-NEXT:  movapd %xmm1, %xmm0
    243   ;SSE-NEXT:  retq
    244 
    245   ;AVX-LABEL: commute_cmppd_lt
    246   ;AVX:       vmovapd (%rdi), %xmm1
    247   ;AVX-NEXT:  vcmpltpd %xmm0, %xmm1, %xmm0
    248   ;AVX-NEXT:  retq
    249 
    250   %1 = load <2 x double>, <2 x double>* %a0
    251   %2 = fcmp olt <2 x double> %1, %a1
    252   %3 = sext <2 x i1> %2 to <2 x i64>
    253   ret <2 x i64> %3
    254 }
    255 
    256 define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) #0 {
    257   ;SSE-LABEL: commute_cmppd_le
    258   ;SSE:       movapd (%rdi), %xmm1
    259   ;SSE-NEXT:  cmplepd %xmm0, %xmm1
    260   ;SSE-NEXT:  movapd %xmm1, %xmm0
    261   ;SSE-NEXT:  retq
    262 
    263   ;AVX-LABEL: commute_cmppd_le
    264   ;AVX:       vmovapd (%rdi), %xmm1
    265   ;AVX-NEXT:  vcmplepd %xmm0, %xmm1, %xmm0
    266   ;AVX-NEXT:  retq
    267 
    268   %1 = load <2 x double>, <2 x double>* %a0
    269   %2 = fcmp ole <2 x double> %1, %a1
    270   %3 = sext <2 x i1> %2 to <2 x i64>
    271   ret <2 x i64> %3
    272 }
    273 
    274 define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
    275   ;AVX-LABEL: commute_cmppd_eq
    276   ;AVX:       vcmpeqpd (%rdi), %ymm0, %ymm0
    277   ;AVX-NEXT:  retq
    278 
    279   %1 = load <4 x double>, <4 x double>* %a0
    280   %2 = fcmp oeq <4 x double> %1, %a1
    281   %3 = sext <4 x i1> %2 to <4 x i64>
    282   ret <4 x i64> %3
    283 }
    284 
    285 define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
    286   ;AVX-LABEL: commute_cmppd_ne
    287   ;AVX:       vcmpneqpd (%rdi), %ymm0, %ymm0
    288   ;AVX-NEXT:  retq
    289 
    290   %1 = load <4 x double>, <4 x double>* %a0
    291   %2 = fcmp une <4 x double> %1, %a1
    292   %3 = sext <4 x i1> %2 to <4 x i64>
    293   ret <4 x i64> %3
    294 }
    295 
    296 define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
    297   ;AVX-LABEL: commute_cmppd_ord
    298   ;AVX:       vcmpordpd (%rdi), %ymm0, %ymm0
    299   ;AVX-NEXT:  retq
    300 
    301   %1 = load <4 x double>, <4 x double>* %a0
    302   %2 = fcmp ord <4 x double> %1, %a1
    303   %3 = sext <4 x i1> %2 to <4 x i64>
    304   ret <4 x i64> %3
    305 }
    306 
    307 define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
    308   ;AVX-LABEL: commute_cmppd_uno
    309   ;AVX:       vcmpunordpd (%rdi), %ymm0, %ymm0
    310   ;AVX-NEXT:  retq
    311 
    312   %1 = load <4 x double>, <4 x double>* %a0
    313   %2 = fcmp uno <4 x double> %1, %a1
    314   %3 = sext <4 x i1> %2 to <4 x i64>
    315   ret <4 x i64> %3
    316 }
    317 
    318 define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
    319   ;AVX-LABEL: commute_cmppd_lt
    320   ;AVX:       vmovapd (%rdi), %ymm1
    321   ;AVX-NEXT:  vcmpltpd %ymm0, %ymm1, %ymm0
    322   ;AVX-NEXT:  retq
    323 
    324   %1 = load <4 x double>, <4 x double>* %a0
    325   %2 = fcmp olt <4 x double> %1, %a1
    326   %3 = sext <4 x i1> %2 to <4 x i64>
    327   ret <4 x i64> %3
    328 }
    329 
    330 define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
    331   ;AVX-LABEL: commute_cmppd_le
    332   ;AVX:       vmovapd (%rdi), %ymm1
    333   ;AVX-NEXT:  vcmplepd %ymm0, %ymm1, %ymm0
    334   ;AVX-NEXT:  retq
    335 
    336   %1 = load <4 x double>, <4 x double>* %a0
    337   %2 = fcmp ole <4 x double> %1, %a1
    338   %3 = sext <4 x i1> %2 to <4 x i64>
    339   ret <4 x i64> %3
    340 }
    341