Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse4.1 | FileCheck %s
      2 
      3 define void @t1(float* %R, <4 x float>* %P1) nounwind {
      4 ; CHECK-LABEL: t1:
      5 ; CHECK:       # BB#0:
      6 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
      7 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
      8 ; CHECK-NEXT:    movss 12(%ecx), %xmm0
      9 ; CHECK-NEXT:    movss %xmm0, (%eax)
     10 ; CHECK-NEXT:    retl
     11 
     12 	%X = load <4 x float>, <4 x float>* %P1
     13 	%tmp = extractelement <4 x float> %X, i32 3
     14 	store float %tmp, float* %R
     15 	ret void
     16 }
     17 
     18 define float @t2(<4 x float>* %P1) nounwind {
     19 ; CHECK-LABEL: t2:
     20 ; CHECK:       # BB#0:
     21 ; CHECK-NEXT:    pushl %eax
     22 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     23 ; CHECK-NEXT:    movapd (%eax), %xmm0
     24 ; CHECK-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
     25 ; CHECK-NEXT:    movss %xmm0, (%esp)
     26 ; CHECK-NEXT:    flds (%esp)
     27 ; CHECK-NEXT:    popl %eax
     28 ; CHECK-NEXT:    retl
     29 
     30 	%X = load <4 x float>, <4 x float>* %P1
     31 	%tmp = extractelement <4 x float> %X, i32 2
     32 	ret float %tmp
     33 }
     34 
     35 define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
     36 ; CHECK-LABEL: t3:
     37 ; CHECK:       # BB#0:
     38 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     39 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     40 ; CHECK-NEXT:    movl 12(%ecx), %ecx
     41 ; CHECK-NEXT:    movl %ecx, (%eax)
     42 ; CHECK-NEXT:    retl
     43 
     44 	%X = load <4 x i32>, <4 x i32>* %P1
     45 	%tmp = extractelement <4 x i32> %X, i32 3
     46 	store i32 %tmp, i32* %R
     47 	ret void
     48 }
     49 
     50 define i32 @t4(<4 x i32>* %P1) nounwind {
     51 ; CHECK-LABEL: t4:
     52 ; CHECK:       # BB#0:
     53 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     54 ; CHECK-NEXT:    movl 12(%eax), %eax
     55 ; CHECK-NEXT:    retl
     56 
     57 	%X = load <4 x i32>, <4 x i32>* %P1
     58 	%tmp = extractelement <4 x i32> %X, i32 3
     59 	ret i32 %tmp
     60 }
     61