1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2,-sse | FileCheck %s --check-prefix=X64 4 5 define void @fadd_2f64_mem(<2 x double>* %p0, <2 x double>* %p1, <2 x double>* %p2) nounwind { 6 ; X32-LABEL: fadd_2f64_mem: 7 ; X32: # %bb.0: 8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 10 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 11 ; X32-NEXT: fldl 8(%edx) 12 ; X32-NEXT: fldl (%edx) 13 ; X32-NEXT: faddl (%ecx) 14 ; X32-NEXT: fxch %st(1) 15 ; X32-NEXT: faddl 8(%ecx) 16 ; X32-NEXT: fstpl 8(%eax) 17 ; X32-NEXT: fstpl (%eax) 18 ; X32-NEXT: retl 19 ; 20 ; X64-LABEL: fadd_2f64_mem: 21 ; X64: # %bb.0: 22 ; X64-NEXT: fldl 8(%rdi) 23 ; X64-NEXT: fldl (%rdi) 24 ; X64-NEXT: faddl (%rsi) 25 ; X64-NEXT: fxch %st(1) 26 ; X64-NEXT: faddl 8(%rsi) 27 ; X64-NEXT: fstpl 8(%rdx) 28 ; X64-NEXT: fstpl (%rdx) 29 ; X64-NEXT: retq 30 %1 = load <2 x double>, <2 x double>* %p0 31 %2 = load <2 x double>, <2 x double>* %p1 32 %3 = fadd <2 x double> %1, %2 33 store <2 x double> %3, <2 x double>* %p2 34 ret void 35 } 36 37 define void @fadd_4f32_mem(<4 x float>* %p0, <4 x float>* %p1, <4 x float>* %p2) nounwind { 38 ; X32-LABEL: fadd_4f32_mem: 39 ; X32: # %bb.0: 40 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 41 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 42 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 43 ; X32-NEXT: flds 12(%edx) 44 ; X32-NEXT: flds 8(%edx) 45 ; X32-NEXT: flds 4(%edx) 46 ; X32-NEXT: flds (%edx) 47 ; X32-NEXT: fadds (%ecx) 48 ; X32-NEXT: fxch %st(1) 49 ; X32-NEXT: fadds 4(%ecx) 50 ; X32-NEXT: fxch %st(2) 51 ; X32-NEXT: fadds 8(%ecx) 52 ; X32-NEXT: fxch %st(3) 53 ; X32-NEXT: fadds 12(%ecx) 54 ; X32-NEXT: fstps 12(%eax) 55 ; X32-NEXT: fxch %st(2) 56 ; X32-NEXT: fstps 8(%eax) 57 ; X32-NEXT: fstps 4(%eax) 58 ; X32-NEXT: fstps (%eax) 59 ; X32-NEXT: retl 60 ; 61 ; X64-LABEL: fadd_4f32_mem: 62 ; X64: # %bb.0: 63 ; X64-NEXT: flds 12(%rdi) 64 ; X64-NEXT: flds 8(%rdi) 65 ; X64-NEXT: flds 4(%rdi) 66 ; X64-NEXT: flds (%rdi) 67 ; X64-NEXT: fadds (%rsi) 68 ; X64-NEXT: fxch %st(1) 69 ; X64-NEXT: fadds 4(%rsi) 70 ; X64-NEXT: fxch %st(2) 71 ; X64-NEXT: fadds 8(%rsi) 72 ; X64-NEXT: fxch %st(3) 73 ; X64-NEXT: fadds 12(%rsi) 74 ; X64-NEXT: fstps 12(%rdx) 75 ; X64-NEXT: fxch %st(2) 76 ; X64-NEXT: fstps 8(%rdx) 77 ; X64-NEXT: fstps 4(%rdx) 78 ; X64-NEXT: fstps (%rdx) 79 ; X64-NEXT: retq 80 %1 = load <4 x float>, <4 x float>* %p0 81 %2 = load <4 x float>, <4 x float>* %p1 82 %3 = fadd <4 x float> %1, %2 83 store <4 x float> %3, <4 x float>* %p2 84 ret void 85 } 86 87 define void @fdiv_4f32_mem(<4 x float>* %p0, <4 x float>* %p1, <4 x float>* %p2) nounwind { 88 ; X32-LABEL: fdiv_4f32_mem: 89 ; X32: # %bb.0: 90 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 91 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 92 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 93 ; X32-NEXT: flds 12(%edx) 94 ; X32-NEXT: flds 8(%edx) 95 ; X32-NEXT: flds 4(%edx) 96 ; X32-NEXT: flds (%edx) 97 ; X32-NEXT: fdivs (%ecx) 98 ; X32-NEXT: fxch %st(1) 99 ; X32-NEXT: fdivs 4(%ecx) 100 ; X32-NEXT: fxch %st(2) 101 ; X32-NEXT: fdivs 8(%ecx) 102 ; X32-NEXT: fxch %st(3) 103 ; X32-NEXT: fdivs 12(%ecx) 104 ; X32-NEXT: fstps 12(%eax) 105 ; X32-NEXT: fxch %st(2) 106 ; X32-NEXT: fstps 8(%eax) 107 ; X32-NEXT: fstps 4(%eax) 108 ; X32-NEXT: fstps (%eax) 109 ; X32-NEXT: retl 110 ; 111 ; X64-LABEL: fdiv_4f32_mem: 112 ; X64: # %bb.0: 113 ; X64-NEXT: flds 12(%rdi) 114 ; X64-NEXT: flds 8(%rdi) 115 ; X64-NEXT: flds 4(%rdi) 116 ; X64-NEXT: flds (%rdi) 117 ; X64-NEXT: fdivs (%rsi) 118 ; X64-NEXT: fxch %st(1) 119 ; X64-NEXT: fdivs 4(%rsi) 120 ; X64-NEXT: fxch %st(2) 121 ; X64-NEXT: fdivs 8(%rsi) 122 ; X64-NEXT: fxch %st(3) 123 ; X64-NEXT: fdivs 12(%rsi) 124 ; X64-NEXT: fstps 12(%rdx) 125 ; X64-NEXT: fxch %st(2) 126 ; X64-NEXT: fstps 8(%rdx) 127 ; X64-NEXT: fstps 4(%rdx) 128 ; X64-NEXT: fstps (%rdx) 129 ; X64-NEXT: retq 130 %1 = load <4 x float>, <4 x float>* %p0 131 %2 = load <4 x float>, <4 x float>* %p1 132 %3 = fdiv <4 x float> %1, %2 133 store <4 x float> %3, <4 x float>* %p2 134 ret void 135 } 136 137 define void @sitofp_4i64_4f32_mem(<4 x i64>* %p0, <4 x float>* %p1) nounwind { 138 ; X32-LABEL: sitofp_4i64_4f32_mem: 139 ; X32: # %bb.0: 140 ; X32-NEXT: pushl %ebp 141 ; X32-NEXT: movl %esp, %ebp 142 ; X32-NEXT: pushl %ebx 143 ; X32-NEXT: pushl %edi 144 ; X32-NEXT: pushl %esi 145 ; X32-NEXT: andl $-8, %esp 146 ; X32-NEXT: subl $48, %esp 147 ; X32-NEXT: movl 8(%ebp), %eax 148 ; X32-NEXT: movl 24(%eax), %ecx 149 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill 150 ; X32-NEXT: movl 28(%eax), %ecx 151 ; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill 152 ; X32-NEXT: movl 16(%eax), %esi 153 ; X32-NEXT: movl 20(%eax), %edi 154 ; X32-NEXT: movl 8(%eax), %ebx 155 ; X32-NEXT: movl 12(%eax), %edx 156 ; X32-NEXT: movl (%eax), %ecx 157 ; X32-NEXT: movl 4(%eax), %eax 158 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) 159 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) 160 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 161 ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) 162 ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) 163 ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) 164 ; X32-NEXT: movl (%esp), %eax # 4-byte Reload 165 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) 166 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload 167 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) 168 ; X32-NEXT: movl 12(%ebp), %eax 169 ; X32-NEXT: fildll {{[0-9]+}}(%esp) 170 ; X32-NEXT: fildll {{[0-9]+}}(%esp) 171 ; X32-NEXT: fildll {{[0-9]+}}(%esp) 172 ; X32-NEXT: fildll {{[0-9]+}}(%esp) 173 ; X32-NEXT: fstps 12(%eax) 174 ; X32-NEXT: fstps 8(%eax) 175 ; X32-NEXT: fstps 4(%eax) 176 ; X32-NEXT: fstps (%eax) 177 ; X32-NEXT: leal -12(%ebp), %esp 178 ; X32-NEXT: popl %esi 179 ; X32-NEXT: popl %edi 180 ; X32-NEXT: popl %ebx 181 ; X32-NEXT: popl %ebp 182 ; X32-NEXT: retl 183 ; 184 ; X64-LABEL: sitofp_4i64_4f32_mem: 185 ; X64: # %bb.0: 186 ; X64-NEXT: movq 24(%rdi), %rax 187 ; X64-NEXT: movq 16(%rdi), %rcx 188 ; X64-NEXT: movq (%rdi), %rdx 189 ; X64-NEXT: movq 8(%rdi), %rdi 190 ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 191 ; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) 192 ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) 193 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 194 ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) 195 ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) 196 ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) 197 ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) 198 ; X64-NEXT: fstps 12(%rsi) 199 ; X64-NEXT: fstps 8(%rsi) 200 ; X64-NEXT: fstps 4(%rsi) 201 ; X64-NEXT: fstps (%rsi) 202 ; X64-NEXT: retq 203 %1 = load <4 x i64>, <4 x i64>* %p0 204 %2 = sitofp <4 x i64> %1 to <4 x float> 205 store <4 x float> %2, <4 x float>* %p1 206 ret void 207 } 208 209 define void @sitofp_4i32_4f32_mem(<4 x i32>* %p0, <4 x float>* %p1) nounwind { 210 ; X32-LABEL: sitofp_4i32_4f32_mem: 211 ; X32: # %bb.0: 212 ; X32-NEXT: pushl %edi 213 ; X32-NEXT: pushl %esi 214 ; X32-NEXT: subl $16, %esp 215 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 216 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 217 ; X32-NEXT: movl 12(%ecx), %edx 218 ; X32-NEXT: movl 8(%ecx), %esi 219 ; X32-NEXT: movl (%ecx), %edi 220 ; X32-NEXT: movl 4(%ecx), %ecx 221 ; X32-NEXT: movl %edi, (%esp) 222 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) 223 ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) 224 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 225 ; X32-NEXT: fildl (%esp) 226 ; X32-NEXT: fildl {{[0-9]+}}(%esp) 227 ; X32-NEXT: fildl {{[0-9]+}}(%esp) 228 ; X32-NEXT: fildl {{[0-9]+}}(%esp) 229 ; X32-NEXT: fstps 12(%eax) 230 ; X32-NEXT: fstps 8(%eax) 231 ; X32-NEXT: fstps 4(%eax) 232 ; X32-NEXT: fstps (%eax) 233 ; X32-NEXT: addl $16, %esp 234 ; X32-NEXT: popl %esi 235 ; X32-NEXT: popl %edi 236 ; X32-NEXT: retl 237 ; 238 ; X64-LABEL: sitofp_4i32_4f32_mem: 239 ; X64: # %bb.0: 240 ; X64-NEXT: movl 12(%rdi), %eax 241 ; X64-NEXT: movl 8(%rdi), %ecx 242 ; X64-NEXT: movl (%rdi), %edx 243 ; X64-NEXT: movl 4(%rdi), %edi 244 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) 245 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) 246 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) 247 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 248 ; X64-NEXT: fildl -{{[0-9]+}}(%rsp) 249 ; X64-NEXT: fildl -{{[0-9]+}}(%rsp) 250 ; X64-NEXT: fildl -{{[0-9]+}}(%rsp) 251 ; X64-NEXT: fildl -{{[0-9]+}}(%rsp) 252 ; X64-NEXT: fstps 12(%rsi) 253 ; X64-NEXT: fstps 8(%rsi) 254 ; X64-NEXT: fstps 4(%rsi) 255 ; X64-NEXT: fstps (%rsi) 256 ; X64-NEXT: retq 257 %1 = load <4 x i32>, <4 x i32>* %p0 258 %2 = sitofp <4 x i32> %1 to <4 x float> 259 store <4 x float> %2, <4 x float>* %p1 260 ret void 261 } 262 263 define void @add_2i64_mem(<2 x i64>* %p0, <2 x i64>* %p1, <2 x i64>* %p2) nounwind { 264 ; X32-LABEL: add_2i64_mem: 265 ; X32: # %bb.0: 266 ; X32-NEXT: pushl %ebx 267 ; X32-NEXT: pushl %edi 268 ; X32-NEXT: pushl %esi 269 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 270 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 271 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 272 ; X32-NEXT: movl 12(%edx), %esi 273 ; X32-NEXT: movl 8(%edx), %edi 274 ; X32-NEXT: movl (%edx), %ebx 275 ; X32-NEXT: movl 4(%edx), %edx 276 ; X32-NEXT: addl (%ecx), %ebx 277 ; X32-NEXT: adcl 4(%ecx), %edx 278 ; X32-NEXT: addl 8(%ecx), %edi 279 ; X32-NEXT: adcl 12(%ecx), %esi 280 ; X32-NEXT: movl %esi, 12(%eax) 281 ; X32-NEXT: movl %edi, 8(%eax) 282 ; X32-NEXT: movl %edx, 4(%eax) 283 ; X32-NEXT: movl %ebx, (%eax) 284 ; X32-NEXT: popl %esi 285 ; X32-NEXT: popl %edi 286 ; X32-NEXT: popl %ebx 287 ; X32-NEXT: retl 288 ; 289 ; X64-LABEL: add_2i64_mem: 290 ; X64: # %bb.0: 291 ; X64-NEXT: movq (%rdi), %rax 292 ; X64-NEXT: movq 8(%rdi), %rcx 293 ; X64-NEXT: addq (%rsi), %rax 294 ; X64-NEXT: addq 8(%rsi), %rcx 295 ; X64-NEXT: movq %rcx, 8(%rdx) 296 ; X64-NEXT: movq %rax, (%rdx) 297 ; X64-NEXT: retq 298 %1 = load <2 x i64>, <2 x i64>* %p0 299 %2 = load <2 x i64>, <2 x i64>* %p1 300 %3 = add <2 x i64> %1, %2 301 store <2 x i64> %3, <2 x i64>* %p2 302 ret void 303 } 304 305 define void @add_4i32_mem(<4 x i32>* %p0, <4 x i32>* %p1, <4 x i32>* %p2) nounwind { 306 ; X32-LABEL: add_4i32_mem: 307 ; X32: # %bb.0: 308 ; X32-NEXT: pushl %ebx 309 ; X32-NEXT: pushl %edi 310 ; X32-NEXT: pushl %esi 311 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 312 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 313 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 314 ; X32-NEXT: movl 12(%edx), %esi 315 ; X32-NEXT: movl 8(%edx), %edi 316 ; X32-NEXT: movl (%edx), %ebx 317 ; X32-NEXT: movl 4(%edx), %edx 318 ; X32-NEXT: addl (%ecx), %ebx 319 ; X32-NEXT: addl 4(%ecx), %edx 320 ; X32-NEXT: addl 8(%ecx), %edi 321 ; X32-NEXT: addl 12(%ecx), %esi 322 ; X32-NEXT: movl %esi, 12(%eax) 323 ; X32-NEXT: movl %edi, 8(%eax) 324 ; X32-NEXT: movl %edx, 4(%eax) 325 ; X32-NEXT: movl %ebx, (%eax) 326 ; X32-NEXT: popl %esi 327 ; X32-NEXT: popl %edi 328 ; X32-NEXT: popl %ebx 329 ; X32-NEXT: retl 330 ; 331 ; X64-LABEL: add_4i32_mem: 332 ; X64: # %bb.0: 333 ; X64-NEXT: movl 12(%rdi), %eax 334 ; X64-NEXT: movl 8(%rdi), %ecx 335 ; X64-NEXT: movl (%rdi), %r8d 336 ; X64-NEXT: movl 4(%rdi), %edi 337 ; X64-NEXT: addl (%rsi), %r8d 338 ; X64-NEXT: addl 4(%rsi), %edi 339 ; X64-NEXT: addl 8(%rsi), %ecx 340 ; X64-NEXT: addl 12(%rsi), %eax 341 ; X64-NEXT: movl %eax, 12(%rdx) 342 ; X64-NEXT: movl %ecx, 8(%rdx) 343 ; X64-NEXT: movl %edi, 4(%rdx) 344 ; X64-NEXT: movl %r8d, (%rdx) 345 ; X64-NEXT: retq 346 %1 = load <4 x i32>, <4 x i32>* %p0 347 %2 = load <4 x i32>, <4 x i32>* %p1 348 %3 = add <4 x i32> %1, %2 349 store <4 x i32> %3, <4 x i32>* %p2 350 ret void 351 } 352