Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X64
      4 
      5 define void @commute_m_pfadd(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
      6 ; X32-LABEL: commute_m_pfadd:
      7 ; X32:       # %bb.0:
      8 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     10 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
     11 ; X32-NEXT:    movq (%edx), %mm0
     12 ; X32-NEXT:    pfadd (%eax), %mm0
     13 ; X32-NEXT:    pfadd (%ecx), %mm0
     14 ; X32-NEXT:    movq %mm0, (%ecx)
     15 ; X32-NEXT:    retl
     16 ;
     17 ; X64-LABEL: commute_m_pfadd:
     18 ; X64:       # %bb.0:
     19 ; X64-NEXT:    movq (%rdi), %mm0
     20 ; X64-NEXT:    pfadd (%rsi), %mm0
     21 ; X64-NEXT:    pfadd (%rdx), %mm0
     22 ; X64-NEXT:    movq %mm0, (%rdx)
     23 ; X64-NEXT:    retq
     24   %1 = load x86_mmx, x86_mmx* %a0
     25   %2 = load x86_mmx, x86_mmx* %a1
     26   %3 = load x86_mmx, x86_mmx* %a2
     27   %4 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2)
     28   %5 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %3, x86_mmx %4)
     29   store x86_mmx %5, x86_mmx* %a2
     30   ret void
     31 }
     32 declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx)
     33 
     34 define void @commute_m_pfsub(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
     35 ; X32-LABEL: commute_m_pfsub:
     36 ; X32:       # %bb.0:
     37 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     38 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     39 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
     40 ; X32-NEXT:    movq (%edx), %mm0
     41 ; X32-NEXT:    pfsub (%eax), %mm0
     42 ; X32-NEXT:    pfsubr (%ecx), %mm0
     43 ; X32-NEXT:    movq %mm0, (%ecx)
     44 ; X32-NEXT:    retl
     45 ;
     46 ; X64-LABEL: commute_m_pfsub:
     47 ; X64:       # %bb.0:
     48 ; X64-NEXT:    movq (%rdi), %mm0
     49 ; X64-NEXT:    pfsub (%rsi), %mm0
     50 ; X64-NEXT:    pfsubr (%rdx), %mm0
     51 ; X64-NEXT:    movq %mm0, (%rdx)
     52 ; X64-NEXT:    retq
     53   %1 = load x86_mmx, x86_mmx* %a0
     54   %2 = load x86_mmx, x86_mmx* %a1
     55   %3 = load x86_mmx, x86_mmx* %a2
     56   %4 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2)
     57   %5 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %3, x86_mmx %4)
     58   store x86_mmx %5, x86_mmx* %a2
     59   ret void
     60 }
     61 declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx)
     62 
     63 define void @commute_m_pfsubr(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
     64 ; X32-LABEL: commute_m_pfsubr:
     65 ; X32:       # %bb.0:
     66 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     67 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     68 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
     69 ; X32-NEXT:    movq (%edx), %mm0
     70 ; X32-NEXT:    pfsubr (%eax), %mm0
     71 ; X32-NEXT:    pfsub (%ecx), %mm0
     72 ; X32-NEXT:    movq %mm0, (%ecx)
     73 ; X32-NEXT:    retl
     74 ;
     75 ; X64-LABEL: commute_m_pfsubr:
     76 ; X64:       # %bb.0:
     77 ; X64-NEXT:    movq (%rdi), %mm0
     78 ; X64-NEXT:    pfsubr (%rsi), %mm0
     79 ; X64-NEXT:    pfsub (%rdx), %mm0
     80 ; X64-NEXT:    movq %mm0, (%rdx)
     81 ; X64-NEXT:    retq
     82   %1 = load x86_mmx, x86_mmx* %a0
     83   %2 = load x86_mmx, x86_mmx* %a1
     84   %3 = load x86_mmx, x86_mmx* %a2
     85   %4 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2)
     86   %5 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %3, x86_mmx %4)
     87   store x86_mmx %5, x86_mmx* %a2
     88   ret void
     89 }
     90 declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx)
     91 
     92 define void @commute_m_pfmul(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
     93 ; X32-LABEL: commute_m_pfmul:
     94 ; X32:       # %bb.0:
     95 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     96 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     97 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
     98 ; X32-NEXT:    movq (%edx), %mm0
     99 ; X32-NEXT:    pfmul (%eax), %mm0
    100 ; X32-NEXT:    pfmul (%ecx), %mm0
    101 ; X32-NEXT:    movq %mm0, (%ecx)
    102 ; X32-NEXT:    retl
    103 ;
    104 ; X64-LABEL: commute_m_pfmul:
    105 ; X64:       # %bb.0:
    106 ; X64-NEXT:    movq (%rdi), %mm0
    107 ; X64-NEXT:    pfmul (%rsi), %mm0
    108 ; X64-NEXT:    pfmul (%rdx), %mm0
    109 ; X64-NEXT:    movq %mm0, (%rdx)
    110 ; X64-NEXT:    retq
    111   %1 = load x86_mmx, x86_mmx* %a0
    112   %2 = load x86_mmx, x86_mmx* %a1
    113   %3 = load x86_mmx, x86_mmx* %a2
    114   %4 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2)
    115   %5 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %3, x86_mmx %4)
    116   store x86_mmx %5, x86_mmx* %a2
    117   ret void
    118 }
    119 declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx)
    120 
    121 ; PFMAX can't commute without fast-math.
    122 define void @commute_m_pfmax(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
    123 ; X32-LABEL: commute_m_pfmax:
    124 ; X32:       # %bb.0:
    125 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    126 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    127 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    128 ; X32-NEXT:    movq (%edx), %mm0
    129 ; X32-NEXT:    movq (%ecx), %mm1
    130 ; X32-NEXT:    pfmax (%eax), %mm0
    131 ; X32-NEXT:    pfmax %mm0, %mm1
    132 ; X32-NEXT:    movq %mm1, (%ecx)
    133 ; X32-NEXT:    retl
    134 ;
    135 ; X64-LABEL: commute_m_pfmax:
    136 ; X64:       # %bb.0:
    137 ; X64-NEXT:    movq (%rdi), %mm0
    138 ; X64-NEXT:    movq (%rdx), %mm1
    139 ; X64-NEXT:    pfmax (%rsi), %mm0
    140 ; X64-NEXT:    pfmax %mm0, %mm1
    141 ; X64-NEXT:    movq %mm1, (%rdx)
    142 ; X64-NEXT:    retq
    143   %1 = load x86_mmx, x86_mmx* %a0
    144   %2 = load x86_mmx, x86_mmx* %a1
    145   %3 = load x86_mmx, x86_mmx* %a2
    146   %4 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2)
    147   %5 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %3, x86_mmx %4)
    148   store x86_mmx %5, x86_mmx* %a2
    149   ret void
    150 }
    151 declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx)
    152 
    153 ; PFMIN can't commute without fast-math.
    154 define void @commute_m_pfmin(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
    155 ; X32-LABEL: commute_m_pfmin:
    156 ; X32:       # %bb.0:
    157 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    158 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    159 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    160 ; X32-NEXT:    movq (%edx), %mm0
    161 ; X32-NEXT:    movq (%ecx), %mm1
    162 ; X32-NEXT:    pfmin (%eax), %mm0
    163 ; X32-NEXT:    pfmin %mm0, %mm1
    164 ; X32-NEXT:    movq %mm1, (%ecx)
    165 ; X32-NEXT:    retl
    166 ;
    167 ; X64-LABEL: commute_m_pfmin:
    168 ; X64:       # %bb.0:
    169 ; X64-NEXT:    movq (%rdi), %mm0
    170 ; X64-NEXT:    movq (%rdx), %mm1
    171 ; X64-NEXT:    pfmin (%rsi), %mm0
    172 ; X64-NEXT:    pfmin %mm0, %mm1
    173 ; X64-NEXT:    movq %mm1, (%rdx)
    174 ; X64-NEXT:    retq
    175   %1 = load x86_mmx, x86_mmx* %a0
    176   %2 = load x86_mmx, x86_mmx* %a1
    177   %3 = load x86_mmx, x86_mmx* %a2
    178   %4 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2)
    179   %5 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %3, x86_mmx %4)
    180   store x86_mmx %5, x86_mmx* %a2
    181   ret void
    182 }
    183 declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx)
    184 
    185 define void @commute_m_pfcmpeq(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
    186 ; X32-LABEL: commute_m_pfcmpeq:
    187 ; X32:       # %bb.0:
    188 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    189 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    190 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    191 ; X32-NEXT:    movq (%edx), %mm0
    192 ; X32-NEXT:    pfcmpeq (%eax), %mm0
    193 ; X32-NEXT:    pfcmpeq (%ecx), %mm0
    194 ; X32-NEXT:    movq %mm0, (%ecx)
    195 ; X32-NEXT:    retl
    196 ;
    197 ; X64-LABEL: commute_m_pfcmpeq:
    198 ; X64:       # %bb.0:
    199 ; X64-NEXT:    movq (%rdi), %mm0
    200 ; X64-NEXT:    pfcmpeq (%rsi), %mm0
    201 ; X64-NEXT:    pfcmpeq (%rdx), %mm0
    202 ; X64-NEXT:    movq %mm0, (%rdx)
    203 ; X64-NEXT:    retq
    204   %1 = load x86_mmx, x86_mmx* %a0
    205   %2 = load x86_mmx, x86_mmx* %a1
    206   %3 = load x86_mmx, x86_mmx* %a2
    207   %4 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2)
    208   %5 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %3, x86_mmx %4)
    209   store x86_mmx %5, x86_mmx* %a2
    210   ret void
    211 }
    212 declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx)
    213 
    214 define void @commute_m_pavgusb(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
    215 ; X32-LABEL: commute_m_pavgusb:
    216 ; X32:       # %bb.0:
    217 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    218 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    219 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    220 ; X32-NEXT:    movq (%edx), %mm0
    221 ; X32-NEXT:    pavgusb (%eax), %mm0
    222 ; X32-NEXT:    pavgusb (%ecx), %mm0
    223 ; X32-NEXT:    movq %mm0, (%ecx)
    224 ; X32-NEXT:    retl
    225 ;
    226 ; X64-LABEL: commute_m_pavgusb:
    227 ; X64:       # %bb.0:
    228 ; X64-NEXT:    movq (%rdi), %mm0
    229 ; X64-NEXT:    pavgusb (%rsi), %mm0
    230 ; X64-NEXT:    pavgusb (%rdx), %mm0
    231 ; X64-NEXT:    movq %mm0, (%rdx)
    232 ; X64-NEXT:    retq
    233   %1 = load x86_mmx, x86_mmx* %a0
    234   %2 = load x86_mmx, x86_mmx* %a1
    235   %3 = load x86_mmx, x86_mmx* %a2
    236   %4 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2)
    237   %5 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %3, x86_mmx %4)
    238   store x86_mmx %5, x86_mmx* %a2
    239   ret void
    240 }
    241 declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx)
    242 
    243 define void @commute_m_pmulhrw(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
    244 ; X32-LABEL: commute_m_pmulhrw:
    245 ; X32:       # %bb.0:
    246 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    247 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    248 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    249 ; X32-NEXT:    movq (%edx), %mm0
    250 ; X32-NEXT:    pmulhrw (%eax), %mm0
    251 ; X32-NEXT:    pmulhrw (%ecx), %mm0
    252 ; X32-NEXT:    movq %mm0, (%ecx)
    253 ; X32-NEXT:    retl
    254 ;
    255 ; X64-LABEL: commute_m_pmulhrw:
    256 ; X64:       # %bb.0:
    257 ; X64-NEXT:    movq (%rdi), %mm0
    258 ; X64-NEXT:    pmulhrw (%rsi), %mm0
    259 ; X64-NEXT:    pmulhrw (%rdx), %mm0
    260 ; X64-NEXT:    movq %mm0, (%rdx)
    261 ; X64-NEXT:    retq
    262   %1 = load x86_mmx, x86_mmx* %a0
    263   %2 = load x86_mmx, x86_mmx* %a1
    264   %3 = load x86_mmx, x86_mmx* %a2
    265   %4 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2)
    266   %5 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %3, x86_mmx %4)
    267   store x86_mmx %5, x86_mmx* %a2
    268   ret void
    269 }
    270 declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx)
    271