Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2,-sse | FileCheck %s --check-prefix=X64
      4 
      5 define void @fadd_2f64_mem(<2 x double>* %p0, <2 x double>* %p1, <2 x double>* %p2) nounwind {
      6 ; X32-LABEL: fadd_2f64_mem:
      7 ; X32:       # %bb.0:
      8 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     10 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
     11 ; X32-NEXT:    fldl 8(%edx)
     12 ; X32-NEXT:    fldl (%edx)
     13 ; X32-NEXT:    faddl (%ecx)
     14 ; X32-NEXT:    fxch %st(1)
     15 ; X32-NEXT:    faddl 8(%ecx)
     16 ; X32-NEXT:    fstpl 8(%eax)
     17 ; X32-NEXT:    fstpl (%eax)
     18 ; X32-NEXT:    retl
     19 ;
     20 ; X64-LABEL: fadd_2f64_mem:
     21 ; X64:       # %bb.0:
     22 ; X64-NEXT:    fldl 8(%rdi)
     23 ; X64-NEXT:    fldl (%rdi)
     24 ; X64-NEXT:    faddl (%rsi)
     25 ; X64-NEXT:    fxch %st(1)
     26 ; X64-NEXT:    faddl 8(%rsi)
     27 ; X64-NEXT:    fstpl 8(%rdx)
     28 ; X64-NEXT:    fstpl (%rdx)
     29 ; X64-NEXT:    retq
     30   %1 = load <2 x double>, <2 x double>* %p0
     31   %2 = load <2 x double>, <2 x double>* %p1
     32   %3 = fadd <2 x double> %1, %2
     33   store <2 x double> %3, <2 x double>* %p2
     34   ret void
     35 }
     36 
     37 define void @fadd_4f32_mem(<4 x float>* %p0, <4 x float>* %p1, <4 x float>* %p2) nounwind {
     38 ; X32-LABEL: fadd_4f32_mem:
     39 ; X32:       # %bb.0:
     40 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     41 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     42 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
     43 ; X32-NEXT:    flds 12(%edx)
     44 ; X32-NEXT:    flds 8(%edx)
     45 ; X32-NEXT:    flds 4(%edx)
     46 ; X32-NEXT:    flds (%edx)
     47 ; X32-NEXT:    fadds (%ecx)
     48 ; X32-NEXT:    fxch %st(1)
     49 ; X32-NEXT:    fadds 4(%ecx)
     50 ; X32-NEXT:    fxch %st(2)
     51 ; X32-NEXT:    fadds 8(%ecx)
     52 ; X32-NEXT:    fxch %st(3)
     53 ; X32-NEXT:    fadds 12(%ecx)
     54 ; X32-NEXT:    fstps 12(%eax)
     55 ; X32-NEXT:    fxch %st(2)
     56 ; X32-NEXT:    fstps 8(%eax)
     57 ; X32-NEXT:    fstps 4(%eax)
     58 ; X32-NEXT:    fstps (%eax)
     59 ; X32-NEXT:    retl
     60 ;
     61 ; X64-LABEL: fadd_4f32_mem:
     62 ; X64:       # %bb.0:
     63 ; X64-NEXT:    flds 12(%rdi)
     64 ; X64-NEXT:    flds 8(%rdi)
     65 ; X64-NEXT:    flds 4(%rdi)
     66 ; X64-NEXT:    flds (%rdi)
     67 ; X64-NEXT:    fadds (%rsi)
     68 ; X64-NEXT:    fxch %st(1)
     69 ; X64-NEXT:    fadds 4(%rsi)
     70 ; X64-NEXT:    fxch %st(2)
     71 ; X64-NEXT:    fadds 8(%rsi)
     72 ; X64-NEXT:    fxch %st(3)
     73 ; X64-NEXT:    fadds 12(%rsi)
     74 ; X64-NEXT:    fstps 12(%rdx)
     75 ; X64-NEXT:    fxch %st(2)
     76 ; X64-NEXT:    fstps 8(%rdx)
     77 ; X64-NEXT:    fstps 4(%rdx)
     78 ; X64-NEXT:    fstps (%rdx)
     79 ; X64-NEXT:    retq
     80   %1 = load <4 x float>, <4 x float>* %p0
     81   %2 = load <4 x float>, <4 x float>* %p1
     82   %3 = fadd <4 x float> %1, %2
     83   store <4 x float> %3, <4 x float>* %p2
     84   ret void
     85 }
     86 
     87 define void @fdiv_4f32_mem(<4 x float>* %p0, <4 x float>* %p1, <4 x float>* %p2) nounwind {
     88 ; X32-LABEL: fdiv_4f32_mem:
     89 ; X32:       # %bb.0:
     90 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     91 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     92 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
     93 ; X32-NEXT:    flds 12(%edx)
     94 ; X32-NEXT:    flds 8(%edx)
     95 ; X32-NEXT:    flds 4(%edx)
     96 ; X32-NEXT:    flds (%edx)
     97 ; X32-NEXT:    fdivs (%ecx)
     98 ; X32-NEXT:    fxch %st(1)
     99 ; X32-NEXT:    fdivs 4(%ecx)
    100 ; X32-NEXT:    fxch %st(2)
    101 ; X32-NEXT:    fdivs 8(%ecx)
    102 ; X32-NEXT:    fxch %st(3)
    103 ; X32-NEXT:    fdivs 12(%ecx)
    104 ; X32-NEXT:    fstps 12(%eax)
    105 ; X32-NEXT:    fxch %st(2)
    106 ; X32-NEXT:    fstps 8(%eax)
    107 ; X32-NEXT:    fstps 4(%eax)
    108 ; X32-NEXT:    fstps (%eax)
    109 ; X32-NEXT:    retl
    110 ;
    111 ; X64-LABEL: fdiv_4f32_mem:
    112 ; X64:       # %bb.0:
    113 ; X64-NEXT:    flds 12(%rdi)
    114 ; X64-NEXT:    flds 8(%rdi)
    115 ; X64-NEXT:    flds 4(%rdi)
    116 ; X64-NEXT:    flds (%rdi)
    117 ; X64-NEXT:    fdivs (%rsi)
    118 ; X64-NEXT:    fxch %st(1)
    119 ; X64-NEXT:    fdivs 4(%rsi)
    120 ; X64-NEXT:    fxch %st(2)
    121 ; X64-NEXT:    fdivs 8(%rsi)
    122 ; X64-NEXT:    fxch %st(3)
    123 ; X64-NEXT:    fdivs 12(%rsi)
    124 ; X64-NEXT:    fstps 12(%rdx)
    125 ; X64-NEXT:    fxch %st(2)
    126 ; X64-NEXT:    fstps 8(%rdx)
    127 ; X64-NEXT:    fstps 4(%rdx)
    128 ; X64-NEXT:    fstps (%rdx)
    129 ; X64-NEXT:    retq
    130   %1 = load <4 x float>, <4 x float>* %p0
    131   %2 = load <4 x float>, <4 x float>* %p1
    132   %3 = fdiv <4 x float> %1, %2
    133   store <4 x float> %3, <4 x float>* %p2
    134   ret void
    135 }
    136 
    137 define void @sitofp_4i64_4f32_mem(<4 x i64>* %p0, <4 x float>* %p1) nounwind {
    138 ; X32-LABEL: sitofp_4i64_4f32_mem:
    139 ; X32:       # %bb.0:
    140 ; X32-NEXT:    pushl %ebp
    141 ; X32-NEXT:    movl %esp, %ebp
    142 ; X32-NEXT:    pushl %ebx
    143 ; X32-NEXT:    pushl %edi
    144 ; X32-NEXT:    pushl %esi
    145 ; X32-NEXT:    andl $-8, %esp
    146 ; X32-NEXT:    subl $48, %esp
    147 ; X32-NEXT:    movl 8(%ebp), %eax
    148 ; X32-NEXT:    movl 24(%eax), %ecx
    149 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
    150 ; X32-NEXT:    movl 28(%eax), %ecx
    151 ; X32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
    152 ; X32-NEXT:    movl 16(%eax), %esi
    153 ; X32-NEXT:    movl 20(%eax), %edi
    154 ; X32-NEXT:    movl 8(%eax), %ebx
    155 ; X32-NEXT:    movl 12(%eax), %edx
    156 ; X32-NEXT:    movl (%eax), %ecx
    157 ; X32-NEXT:    movl 4(%eax), %eax
    158 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
    159 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
    160 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
    161 ; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
    162 ; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp)
    163 ; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
    164 ; X32-NEXT:    movl (%esp), %eax # 4-byte Reload
    165 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
    166 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
    167 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
    168 ; X32-NEXT:    movl 12(%ebp), %eax
    169 ; X32-NEXT:    fildll {{[0-9]+}}(%esp)
    170 ; X32-NEXT:    fildll {{[0-9]+}}(%esp)
    171 ; X32-NEXT:    fildll {{[0-9]+}}(%esp)
    172 ; X32-NEXT:    fildll {{[0-9]+}}(%esp)
    173 ; X32-NEXT:    fstps 12(%eax)
    174 ; X32-NEXT:    fstps 8(%eax)
    175 ; X32-NEXT:    fstps 4(%eax)
    176 ; X32-NEXT:    fstps (%eax)
    177 ; X32-NEXT:    leal -12(%ebp), %esp
    178 ; X32-NEXT:    popl %esi
    179 ; X32-NEXT:    popl %edi
    180 ; X32-NEXT:    popl %ebx
    181 ; X32-NEXT:    popl %ebp
    182 ; X32-NEXT:    retl
    183 ;
    184 ; X64-LABEL: sitofp_4i64_4f32_mem:
    185 ; X64:       # %bb.0:
    186 ; X64-NEXT:    movq 24(%rdi), %rax
    187 ; X64-NEXT:    movq 16(%rdi), %rcx
    188 ; X64-NEXT:    movq (%rdi), %rdx
    189 ; X64-NEXT:    movq 8(%rdi), %rdi
    190 ; X64-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
    191 ; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
    192 ; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
    193 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
    194 ; X64-NEXT:    fildll -{{[0-9]+}}(%rsp)
    195 ; X64-NEXT:    fildll -{{[0-9]+}}(%rsp)
    196 ; X64-NEXT:    fildll -{{[0-9]+}}(%rsp)
    197 ; X64-NEXT:    fildll -{{[0-9]+}}(%rsp)
    198 ; X64-NEXT:    fstps 12(%rsi)
    199 ; X64-NEXT:    fstps 8(%rsi)
    200 ; X64-NEXT:    fstps 4(%rsi)
    201 ; X64-NEXT:    fstps (%rsi)
    202 ; X64-NEXT:    retq
    203   %1 = load <4 x i64>, <4 x i64>* %p0
    204   %2 = sitofp <4 x i64> %1 to <4 x float>
    205   store <4 x float> %2, <4 x float>* %p1
    206   ret void
    207 }
    208 
    209 define void @sitofp_4i32_4f32_mem(<4 x i32>* %p0, <4 x float>* %p1) nounwind {
    210 ; X32-LABEL: sitofp_4i32_4f32_mem:
    211 ; X32:       # %bb.0:
    212 ; X32-NEXT:    pushl %edi
    213 ; X32-NEXT:    pushl %esi
    214 ; X32-NEXT:    subl $16, %esp
    215 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    216 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    217 ; X32-NEXT:    movl 12(%ecx), %edx
    218 ; X32-NEXT:    movl 8(%ecx), %esi
    219 ; X32-NEXT:    movl (%ecx), %edi
    220 ; X32-NEXT:    movl 4(%ecx), %ecx
    221 ; X32-NEXT:    movl %edi, (%esp)
    222 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
    223 ; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
    224 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
    225 ; X32-NEXT:    fildl (%esp)
    226 ; X32-NEXT:    fildl {{[0-9]+}}(%esp)
    227 ; X32-NEXT:    fildl {{[0-9]+}}(%esp)
    228 ; X32-NEXT:    fildl {{[0-9]+}}(%esp)
    229 ; X32-NEXT:    fstps 12(%eax)
    230 ; X32-NEXT:    fstps 8(%eax)
    231 ; X32-NEXT:    fstps 4(%eax)
    232 ; X32-NEXT:    fstps (%eax)
    233 ; X32-NEXT:    addl $16, %esp
    234 ; X32-NEXT:    popl %esi
    235 ; X32-NEXT:    popl %edi
    236 ; X32-NEXT:    retl
    237 ;
    238 ; X64-LABEL: sitofp_4i32_4f32_mem:
    239 ; X64:       # %bb.0:
    240 ; X64-NEXT:    movl 12(%rdi), %eax
    241 ; X64-NEXT:    movl 8(%rdi), %ecx
    242 ; X64-NEXT:    movl (%rdi), %edx
    243 ; X64-NEXT:    movl 4(%rdi), %edi
    244 ; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
    245 ; X64-NEXT:    movl %edi, -{{[0-9]+}}(%rsp)
    246 ; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
    247 ; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
    248 ; X64-NEXT:    fildl -{{[0-9]+}}(%rsp)
    249 ; X64-NEXT:    fildl -{{[0-9]+}}(%rsp)
    250 ; X64-NEXT:    fildl -{{[0-9]+}}(%rsp)
    251 ; X64-NEXT:    fildl -{{[0-9]+}}(%rsp)
    252 ; X64-NEXT:    fstps 12(%rsi)
    253 ; X64-NEXT:    fstps 8(%rsi)
    254 ; X64-NEXT:    fstps 4(%rsi)
    255 ; X64-NEXT:    fstps (%rsi)
    256 ; X64-NEXT:    retq
    257   %1 = load <4 x i32>, <4 x i32>* %p0
    258   %2 = sitofp <4 x i32> %1 to <4 x float>
    259   store <4 x float> %2, <4 x float>* %p1
    260   ret void
    261 }
    262 
    263 define void @add_2i64_mem(<2 x i64>* %p0, <2 x i64>* %p1, <2 x i64>* %p2) nounwind {
    264 ; X32-LABEL: add_2i64_mem:
    265 ; X32:       # %bb.0:
    266 ; X32-NEXT:    pushl %ebx
    267 ; X32-NEXT:    pushl %edi
    268 ; X32-NEXT:    pushl %esi
    269 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    270 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    271 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    272 ; X32-NEXT:    movl 12(%edx), %esi
    273 ; X32-NEXT:    movl 8(%edx), %edi
    274 ; X32-NEXT:    movl (%edx), %ebx
    275 ; X32-NEXT:    movl 4(%edx), %edx
    276 ; X32-NEXT:    addl (%ecx), %ebx
    277 ; X32-NEXT:    adcl 4(%ecx), %edx
    278 ; X32-NEXT:    addl 8(%ecx), %edi
    279 ; X32-NEXT:    adcl 12(%ecx), %esi
    280 ; X32-NEXT:    movl %esi, 12(%eax)
    281 ; X32-NEXT:    movl %edi, 8(%eax)
    282 ; X32-NEXT:    movl %edx, 4(%eax)
    283 ; X32-NEXT:    movl %ebx, (%eax)
    284 ; X32-NEXT:    popl %esi
    285 ; X32-NEXT:    popl %edi
    286 ; X32-NEXT:    popl %ebx
    287 ; X32-NEXT:    retl
    288 ;
    289 ; X64-LABEL: add_2i64_mem:
    290 ; X64:       # %bb.0:
    291 ; X64-NEXT:    movq (%rdi), %rax
    292 ; X64-NEXT:    movq 8(%rdi), %rcx
    293 ; X64-NEXT:    addq (%rsi), %rax
    294 ; X64-NEXT:    addq 8(%rsi), %rcx
    295 ; X64-NEXT:    movq %rcx, 8(%rdx)
    296 ; X64-NEXT:    movq %rax, (%rdx)
    297 ; X64-NEXT:    retq
    298   %1 = load <2 x i64>, <2 x i64>* %p0
    299   %2 = load <2 x i64>, <2 x i64>* %p1
    300   %3 = add <2 x i64> %1, %2
    301   store <2 x i64> %3, <2 x i64>* %p2
    302   ret void
    303 }
    304 
    305 define void @add_4i32_mem(<4 x i32>* %p0, <4 x i32>* %p1, <4 x i32>* %p2) nounwind {
    306 ; X32-LABEL: add_4i32_mem:
    307 ; X32:       # %bb.0:
    308 ; X32-NEXT:    pushl %ebx
    309 ; X32-NEXT:    pushl %edi
    310 ; X32-NEXT:    pushl %esi
    311 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    312 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    313 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
    314 ; X32-NEXT:    movl 12(%edx), %esi
    315 ; X32-NEXT:    movl 8(%edx), %edi
    316 ; X32-NEXT:    movl (%edx), %ebx
    317 ; X32-NEXT:    movl 4(%edx), %edx
    318 ; X32-NEXT:    addl (%ecx), %ebx
    319 ; X32-NEXT:    addl 4(%ecx), %edx
    320 ; X32-NEXT:    addl 8(%ecx), %edi
    321 ; X32-NEXT:    addl 12(%ecx), %esi
    322 ; X32-NEXT:    movl %esi, 12(%eax)
    323 ; X32-NEXT:    movl %edi, 8(%eax)
    324 ; X32-NEXT:    movl %edx, 4(%eax)
    325 ; X32-NEXT:    movl %ebx, (%eax)
    326 ; X32-NEXT:    popl %esi
    327 ; X32-NEXT:    popl %edi
    328 ; X32-NEXT:    popl %ebx
    329 ; X32-NEXT:    retl
    330 ;
    331 ; X64-LABEL: add_4i32_mem:
    332 ; X64:       # %bb.0:
    333 ; X64-NEXT:    movl 12(%rdi), %eax
    334 ; X64-NEXT:    movl 8(%rdi), %ecx
    335 ; X64-NEXT:    movl (%rdi), %r8d
    336 ; X64-NEXT:    movl 4(%rdi), %edi
    337 ; X64-NEXT:    addl (%rsi), %r8d
    338 ; X64-NEXT:    addl 4(%rsi), %edi
    339 ; X64-NEXT:    addl 8(%rsi), %ecx
    340 ; X64-NEXT:    addl 12(%rsi), %eax
    341 ; X64-NEXT:    movl %eax, 12(%rdx)
    342 ; X64-NEXT:    movl %ecx, 8(%rdx)
    343 ; X64-NEXT:    movl %edi, 4(%rdx)
    344 ; X64-NEXT:    movl %r8d, (%rdx)
    345 ; X64-NEXT:    retq
    346   %1 = load <4 x i32>, <4 x i32>* %p0
    347   %2 = load <4 x i32>, <4 x i32>* %p1
    348   %3 = add <4 x i32> %1, %2
    349   store <4 x i32> %3, <4 x i32>* %p2
    350   ret void
    351 }
    352