1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X64 4 5 define void @commute_m_pfadd(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 6 ; X32-LABEL: commute_m_pfadd: 7 ; X32: # %bb.0: 8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 10 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 11 ; X32-NEXT: movq (%edx), %mm0 12 ; X32-NEXT: pfadd (%eax), %mm0 13 ; X32-NEXT: pfadd (%ecx), %mm0 14 ; X32-NEXT: movq %mm0, (%ecx) 15 ; X32-NEXT: retl 16 ; 17 ; X64-LABEL: commute_m_pfadd: 18 ; X64: # %bb.0: 19 ; X64-NEXT: movq (%rdi), %mm0 20 ; X64-NEXT: pfadd (%rsi), %mm0 21 ; X64-NEXT: pfadd (%rdx), %mm0 22 ; X64-NEXT: movq %mm0, (%rdx) 23 ; X64-NEXT: retq 24 %1 = load x86_mmx, x86_mmx* %a0 25 %2 = load x86_mmx, x86_mmx* %a1 26 %3 = load x86_mmx, x86_mmx* %a2 27 %4 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2) 28 %5 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %3, x86_mmx %4) 29 store x86_mmx %5, x86_mmx* %a2 30 ret void 31 } 32 declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) 33 34 define void @commute_m_pfsub(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 35 ; X32-LABEL: commute_m_pfsub: 36 ; X32: # %bb.0: 37 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 38 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 39 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 40 ; X32-NEXT: movq (%edx), %mm0 41 ; X32-NEXT: pfsub (%eax), %mm0 42 ; X32-NEXT: pfsubr (%ecx), %mm0 43 ; X32-NEXT: movq %mm0, (%ecx) 44 ; X32-NEXT: retl 45 ; 46 ; X64-LABEL: commute_m_pfsub: 47 ; X64: # %bb.0: 48 ; X64-NEXT: movq (%rdi), %mm0 49 ; X64-NEXT: pfsub (%rsi), %mm0 50 ; X64-NEXT: pfsubr (%rdx), %mm0 51 ; X64-NEXT: movq %mm0, (%rdx) 52 ; X64-NEXT: retq 53 %1 = load x86_mmx, x86_mmx* %a0 54 %2 = load x86_mmx, x86_mmx* %a1 55 %3 = load x86_mmx, x86_mmx* %a2 56 %4 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2) 57 %5 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %3, x86_mmx %4) 58 store x86_mmx %5, x86_mmx* %a2 59 ret void 60 } 61 declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) 62 63 define void @commute_m_pfsubr(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 64 ; X32-LABEL: commute_m_pfsubr: 65 ; X32: # %bb.0: 66 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 67 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 68 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 69 ; X32-NEXT: movq (%edx), %mm0 70 ; X32-NEXT: pfsubr (%eax), %mm0 71 ; X32-NEXT: pfsub (%ecx), %mm0 72 ; X32-NEXT: movq %mm0, (%ecx) 73 ; X32-NEXT: retl 74 ; 75 ; X64-LABEL: commute_m_pfsubr: 76 ; X64: # %bb.0: 77 ; X64-NEXT: movq (%rdi), %mm0 78 ; X64-NEXT: pfsubr (%rsi), %mm0 79 ; X64-NEXT: pfsub (%rdx), %mm0 80 ; X64-NEXT: movq %mm0, (%rdx) 81 ; X64-NEXT: retq 82 %1 = load x86_mmx, x86_mmx* %a0 83 %2 = load x86_mmx, x86_mmx* %a1 84 %3 = load x86_mmx, x86_mmx* %a2 85 %4 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2) 86 %5 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %3, x86_mmx %4) 87 store x86_mmx %5, x86_mmx* %a2 88 ret void 89 } 90 declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) 91 92 define void @commute_m_pfmul(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 93 ; X32-LABEL: commute_m_pfmul: 94 ; X32: # %bb.0: 95 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 96 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 97 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 98 ; X32-NEXT: movq (%edx), %mm0 99 ; X32-NEXT: pfmul (%eax), %mm0 100 ; X32-NEXT: pfmul (%ecx), %mm0 101 ; X32-NEXT: movq %mm0, (%ecx) 102 ; X32-NEXT: retl 103 ; 104 ; X64-LABEL: commute_m_pfmul: 105 ; X64: # %bb.0: 106 ; X64-NEXT: movq (%rdi), %mm0 107 ; X64-NEXT: pfmul (%rsi), %mm0 108 ; X64-NEXT: pfmul (%rdx), %mm0 109 ; X64-NEXT: movq %mm0, (%rdx) 110 ; X64-NEXT: retq 111 %1 = load x86_mmx, x86_mmx* %a0 112 %2 = load x86_mmx, x86_mmx* %a1 113 %3 = load x86_mmx, x86_mmx* %a2 114 %4 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2) 115 %5 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %3, x86_mmx %4) 116 store x86_mmx %5, x86_mmx* %a2 117 ret void 118 } 119 declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) 120 121 ; PFMAX can't commute without fast-math. 122 define void @commute_m_pfmax(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 123 ; X32-LABEL: commute_m_pfmax: 124 ; X32: # %bb.0: 125 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 126 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 127 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 128 ; X32-NEXT: movq (%edx), %mm0 129 ; X32-NEXT: movq (%ecx), %mm1 130 ; X32-NEXT: pfmax (%eax), %mm0 131 ; X32-NEXT: pfmax %mm0, %mm1 132 ; X32-NEXT: movq %mm1, (%ecx) 133 ; X32-NEXT: retl 134 ; 135 ; X64-LABEL: commute_m_pfmax: 136 ; X64: # %bb.0: 137 ; X64-NEXT: movq (%rdi), %mm0 138 ; X64-NEXT: movq (%rdx), %mm1 139 ; X64-NEXT: pfmax (%rsi), %mm0 140 ; X64-NEXT: pfmax %mm0, %mm1 141 ; X64-NEXT: movq %mm1, (%rdx) 142 ; X64-NEXT: retq 143 %1 = load x86_mmx, x86_mmx* %a0 144 %2 = load x86_mmx, x86_mmx* %a1 145 %3 = load x86_mmx, x86_mmx* %a2 146 %4 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2) 147 %5 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %3, x86_mmx %4) 148 store x86_mmx %5, x86_mmx* %a2 149 ret void 150 } 151 declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) 152 153 ; PFMIN can't commute without fast-math. 154 define void @commute_m_pfmin(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 155 ; X32-LABEL: commute_m_pfmin: 156 ; X32: # %bb.0: 157 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 158 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 159 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 160 ; X32-NEXT: movq (%edx), %mm0 161 ; X32-NEXT: movq (%ecx), %mm1 162 ; X32-NEXT: pfmin (%eax), %mm0 163 ; X32-NEXT: pfmin %mm0, %mm1 164 ; X32-NEXT: movq %mm1, (%ecx) 165 ; X32-NEXT: retl 166 ; 167 ; X64-LABEL: commute_m_pfmin: 168 ; X64: # %bb.0: 169 ; X64-NEXT: movq (%rdi), %mm0 170 ; X64-NEXT: movq (%rdx), %mm1 171 ; X64-NEXT: pfmin (%rsi), %mm0 172 ; X64-NEXT: pfmin %mm0, %mm1 173 ; X64-NEXT: movq %mm1, (%rdx) 174 ; X64-NEXT: retq 175 %1 = load x86_mmx, x86_mmx* %a0 176 %2 = load x86_mmx, x86_mmx* %a1 177 %3 = load x86_mmx, x86_mmx* %a2 178 %4 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2) 179 %5 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %3, x86_mmx %4) 180 store x86_mmx %5, x86_mmx* %a2 181 ret void 182 } 183 declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) 184 185 define void @commute_m_pfcmpeq(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 186 ; X32-LABEL: commute_m_pfcmpeq: 187 ; X32: # %bb.0: 188 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 189 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 190 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 191 ; X32-NEXT: movq (%edx), %mm0 192 ; X32-NEXT: pfcmpeq (%eax), %mm0 193 ; X32-NEXT: pfcmpeq (%ecx), %mm0 194 ; X32-NEXT: movq %mm0, (%ecx) 195 ; X32-NEXT: retl 196 ; 197 ; X64-LABEL: commute_m_pfcmpeq: 198 ; X64: # %bb.0: 199 ; X64-NEXT: movq (%rdi), %mm0 200 ; X64-NEXT: pfcmpeq (%rsi), %mm0 201 ; X64-NEXT: pfcmpeq (%rdx), %mm0 202 ; X64-NEXT: movq %mm0, (%rdx) 203 ; X64-NEXT: retq 204 %1 = load x86_mmx, x86_mmx* %a0 205 %2 = load x86_mmx, x86_mmx* %a1 206 %3 = load x86_mmx, x86_mmx* %a2 207 %4 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2) 208 %5 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %3, x86_mmx %4) 209 store x86_mmx %5, x86_mmx* %a2 210 ret void 211 } 212 declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) 213 214 define void @commute_m_pavgusb(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 215 ; X32-LABEL: commute_m_pavgusb: 216 ; X32: # %bb.0: 217 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 218 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 219 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 220 ; X32-NEXT: movq (%edx), %mm0 221 ; X32-NEXT: pavgusb (%eax), %mm0 222 ; X32-NEXT: pavgusb (%ecx), %mm0 223 ; X32-NEXT: movq %mm0, (%ecx) 224 ; X32-NEXT: retl 225 ; 226 ; X64-LABEL: commute_m_pavgusb: 227 ; X64: # %bb.0: 228 ; X64-NEXT: movq (%rdi), %mm0 229 ; X64-NEXT: pavgusb (%rsi), %mm0 230 ; X64-NEXT: pavgusb (%rdx), %mm0 231 ; X64-NEXT: movq %mm0, (%rdx) 232 ; X64-NEXT: retq 233 %1 = load x86_mmx, x86_mmx* %a0 234 %2 = load x86_mmx, x86_mmx* %a1 235 %3 = load x86_mmx, x86_mmx* %a2 236 %4 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2) 237 %5 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %3, x86_mmx %4) 238 store x86_mmx %5, x86_mmx* %a2 239 ret void 240 } 241 declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) 242 243 define void @commute_m_pmulhrw(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 244 ; X32-LABEL: commute_m_pmulhrw: 245 ; X32: # %bb.0: 246 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 247 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 248 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 249 ; X32-NEXT: movq (%edx), %mm0 250 ; X32-NEXT: pmulhrw (%eax), %mm0 251 ; X32-NEXT: pmulhrw (%ecx), %mm0 252 ; X32-NEXT: movq %mm0, (%ecx) 253 ; X32-NEXT: retl 254 ; 255 ; X64-LABEL: commute_m_pmulhrw: 256 ; X64: # %bb.0: 257 ; X64-NEXT: movq (%rdi), %mm0 258 ; X64-NEXT: pmulhrw (%rsi), %mm0 259 ; X64-NEXT: pmulhrw (%rdx), %mm0 260 ; X64-NEXT: movq %mm0, (%rdx) 261 ; X64-NEXT: retq 262 %1 = load x86_mmx, x86_mmx* %a0 263 %2 = load x86_mmx, x86_mmx* %a1 264 %3 = load x86_mmx, x86_mmx* %a2 265 %4 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2) 266 %5 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %3, x86_mmx %4) 267 store x86_mmx %5, x86_mmx* %a2 268 ret void 269 } 270 declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) 271