Home | History | Annotate | Download | only in X86
      1 //===---------------------------------------------------------------------===//
      2 // Random ideas for the X86 backend: MMX-specific stuff.
      3 //===---------------------------------------------------------------------===//
      4 
      5 //===---------------------------------------------------------------------===//
      6 
      7 This:
      8 
      9 #include <mmintrin.h>
     10 
     11 __v2si qux(int A) {
     12   return (__v2si){ 0, A };
     13 }
     14 
     15 is compiled into:
     16 
     17 _qux:
     18         subl $28, %esp
     19         movl 32(%esp), %eax
     20         movd %eax, %mm0
     21         movq %mm0, (%esp)
     22         movl (%esp), %eax
     23         movl %eax, 20(%esp)
     24         movq %mm0, 8(%esp)
     25         movl 12(%esp), %eax
     26         movl %eax, 16(%esp)
     27         movq 16(%esp), %mm0
     28         addl $28, %esp
     29         ret
     30 
     31 Yuck!
     32 
     33 GCC gives us:
     34 
     35 _qux:
     36         subl    $12, %esp
     37         movl    16(%esp), %eax
     38         movl    20(%esp), %edx
     39         movl    $0, (%eax)
     40         movl    %edx, 4(%eax)
     41         addl    $12, %esp
     42         ret     $4
     43 
     44 //===---------------------------------------------------------------------===//
     45 
     46 We generate crappy code for this:
     47 
     48 __m64 t() {
     49   return _mm_cvtsi32_si64(1);
     50 }
     51 
     52 _t:
     53 	subl	$12, %esp
     54 	movl	$1, %eax
     55 	movd	%eax, %mm0
     56 	movq	%mm0, (%esp)
     57 	movl	(%esp), %eax
     58 	movl	4(%esp), %edx
     59 	addl	$12, %esp
     60 	ret
     61 
     62 The extra stack traffic is covered in the previous entry. But the other reason
     63 is we are not smart about materializing constants in MMX registers. With -m64
     64 
     65 	movl	$1, %eax
     66 	movd	%eax, %mm0
     67 	movd	%mm0, %rax
     68 	ret
     69 
     70 We should be using a constantpool load instead:
     71 	movq	LC0(%rip), %rax
     72