Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 define double @mmx_zero(double, double, double, double) nounwind {
      6 ; X86-LABEL: mmx_zero:
      7 ; X86:       # %bb.0:
      8 ; X86-NEXT:    pushl %ebp
      9 ; X86-NEXT:    movl %esp, %ebp
     10 ; X86-NEXT:    andl $-8, %esp
     11 ; X86-NEXT:    subl $16, %esp
     12 ; X86-NEXT:    movq 8(%ebp), %mm0
     13 ; X86-NEXT:    movq 16(%ebp), %mm5
     14 ; X86-NEXT:    movq %mm5, (%esp) # 8-byte Spill
     15 ; X86-NEXT:    movq %mm0, %mm3
     16 ; X86-NEXT:    paddd %mm5, %mm3
     17 ; X86-NEXT:    pxor %mm1, %mm1
     18 ; X86-NEXT:    movq %mm3, %mm6
     19 ; X86-NEXT:    pmuludq %mm1, %mm6
     20 ; X86-NEXT:    movq 24(%ebp), %mm4
     21 ; X86-NEXT:    movq %mm6, %mm2
     22 ; X86-NEXT:    paddd %mm4, %mm2
     23 ; X86-NEXT:    paddw %mm2, %mm0
     24 ; X86-NEXT:    movq %mm5, %mm1
     25 ; X86-NEXT:    paddw %mm0, %mm1
     26 ; X86-NEXT:    movq 32(%ebp), %mm5
     27 ; X86-NEXT:    movq %mm1, %mm7
     28 ; X86-NEXT:    pmuludq %mm5, %mm7
     29 ; X86-NEXT:    paddw %mm4, %mm7
     30 ; X86-NEXT:    paddw %mm7, %mm5
     31 ; X86-NEXT:    paddw %mm5, %mm2
     32 ; X86-NEXT:    paddw %mm2, %mm0
     33 ; X86-NEXT:    paddw %mm6, %mm0
     34 ; X86-NEXT:    pmuludq %mm3, %mm0
     35 ; X86-NEXT:    paddw {{\.LCPI.*}}, %mm0
     36 ; X86-NEXT:    paddw %mm1, %mm0
     37 ; X86-NEXT:    pmuludq %mm7, %mm0
     38 ; X86-NEXT:    pmuludq (%esp), %mm0 # 8-byte Folded Reload
     39 ; X86-NEXT:    paddw %mm5, %mm0
     40 ; X86-NEXT:    paddw %mm2, %mm0
     41 ; X86-NEXT:    movq2dq %mm0, %xmm0
     42 ; X86-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
     43 ; X86-NEXT:    fldl {{[0-9]+}}(%esp)
     44 ; X86-NEXT:    movl %ebp, %esp
     45 ; X86-NEXT:    popl %ebp
     46 ; X86-NEXT:    retl
     47 ;
     48 ; X64-LABEL: mmx_zero:
     49 ; X64:       # %bb.0:
     50 ; X64-NEXT:    movdq2q %xmm0, %mm0
     51 ; X64-NEXT:    movdq2q %xmm1, %mm5
     52 ; X64-NEXT:    movq %mm5, -{{[0-9]+}}(%rsp) # 8-byte Spill
     53 ; X64-NEXT:    movq %mm0, %mm3
     54 ; X64-NEXT:    paddd %mm5, %mm3
     55 ; X64-NEXT:    pxor %mm1, %mm1
     56 ; X64-NEXT:    movq %mm3, %mm6
     57 ; X64-NEXT:    pmuludq %mm1, %mm6
     58 ; X64-NEXT:    movdq2q %xmm2, %mm4
     59 ; X64-NEXT:    movq %mm6, %mm2
     60 ; X64-NEXT:    paddd %mm4, %mm2
     61 ; X64-NEXT:    paddw %mm2, %mm0
     62 ; X64-NEXT:    movq %mm5, %mm1
     63 ; X64-NEXT:    paddw %mm0, %mm1
     64 ; X64-NEXT:    movdq2q %xmm3, %mm5
     65 ; X64-NEXT:    movq %mm1, %mm7
     66 ; X64-NEXT:    pmuludq %mm5, %mm7
     67 ; X64-NEXT:    paddw %mm4, %mm7
     68 ; X64-NEXT:    paddw %mm7, %mm5
     69 ; X64-NEXT:    paddw %mm5, %mm2
     70 ; X64-NEXT:    paddw %mm2, %mm0
     71 ; X64-NEXT:    paddw %mm6, %mm0
     72 ; X64-NEXT:    pmuludq %mm3, %mm0
     73 ; X64-NEXT:    paddw {{\.LCPI.*}}, %mm0
     74 ; X64-NEXT:    paddw %mm1, %mm0
     75 ; X64-NEXT:    pmuludq %mm7, %mm0
     76 ; X64-NEXT:    pmuludq -{{[0-9]+}}(%rsp), %mm0 # 8-byte Folded Reload
     77 ; X64-NEXT:    paddw %mm5, %mm0
     78 ; X64-NEXT:    paddw %mm2, %mm0
     79 ; X64-NEXT:    movq2dq %mm0, %xmm0
     80 ; X64-NEXT:    retq
     81   %5 = bitcast double %0 to x86_mmx
     82   %6 = bitcast double %1 to x86_mmx
     83   %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %6)
     84   %8 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %7, x86_mmx bitcast (double 0.000000e+00 to x86_mmx))
     85   %9 = bitcast double %2 to x86_mmx
     86   %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %8, x86_mmx %9)
     87   %11 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %5, x86_mmx %10)
     88   %12 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %6, x86_mmx %11)
     89   %13 = bitcast double %3 to x86_mmx
     90   %14 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %12, x86_mmx %13)
     91   %15 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %14, x86_mmx %9)
     92   %16 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %15, x86_mmx %13)
     93   %17 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %16, x86_mmx %10)
     94   %18 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %17, x86_mmx %11)
     95   %19 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %18, x86_mmx %8)
     96   %20 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %19, x86_mmx %7)
     97   %21 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %20, x86_mmx bitcast (double 0.000000e+00 to x86_mmx))
     98   %22 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %21, x86_mmx %12)
     99   %23 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %22, x86_mmx %15)
    100   %24 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %23, x86_mmx %6)
    101   %25 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %24, x86_mmx %16)
    102   %26 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %25, x86_mmx %17)
    103   %27 = bitcast x86_mmx %26 to double
    104   ret double %27
    105 }
    106 
    107 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
    108 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
    109 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx)
    110