1 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64 2 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=W64 3 ; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32 4 5 ; PR7518 6 define void @test1(<2 x float> %Q, float *%P2) nounwind { 7 %a = extractelement <2 x float> %Q, i32 0 8 %b = extractelement <2 x float> %Q, i32 1 9 %c = fadd float %a, %b 10 11 store float %c, float* %P2 12 ret void 13 ; X64: test1: 14 ; X64-NEXT: pshufd $1, %xmm0, %xmm1 15 ; X64-NEXT: addss %xmm0, %xmm1 16 ; X64-NEXT: movss %xmm1, (%rdi) 17 ; X64-NEXT: ret 18 19 ; W64: test1: 20 ; W64-NEXT: movdqa (%rcx), %xmm0 21 ; W64-NEXT: pshufd $1, %xmm0, %xmm1 22 ; W64-NEXT: addss %xmm0, %xmm1 23 ; W64-NEXT: movss %xmm1, (%rdx) 24 ; W64-NEXT: ret 25 26 ; X32: test1: 27 ; X32-NEXT: pshufd $1, %xmm0, %xmm1 28 ; X32-NEXT: addss %xmm0, %xmm1 29 ; X32-NEXT: movl 4(%esp), %eax 30 ; X32-NEXT: movss %xmm1, (%eax) 31 ; X32-NEXT: ret 32 } 33 34 35 define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind { 36 %Z = fadd <2 x float> %Q, %R 37 ret <2 x float> %Z 38 39 ; X64: test2: 40 ; X64-NEXT: addps %xmm1, %xmm0 41 ; X64-NEXT: ret 42 43 ; W64: test2: 44 ; W64-NEXT: movaps (%rcx), %xmm0 45 ; W64-NEXT: addps (%rdx), %xmm0 46 ; W64-NEXT: ret 47 48 ; X32: test2: 49 ; X32: addps %xmm1, %xmm0 50 } 51 52 53 define <2 x float> @test3(<4 x float> %A) nounwind { 54 %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> 55 %C = fadd <2 x float> %B, %B 56 ret <2 x float> %C 57 ; X64: test3: 58 ; X64-NEXT: addps %xmm0, %xmm0 59 ; X64-NEXT: ret 60 61 ; W64: test3: 62 ; W64-NEXT: movaps (%rcx), %xmm0 63 ; W64-NEXT: addps %xmm0, %xmm0 64 ; W64-NEXT: ret 65 66 ; X32: test3: 67 ; X32-NEXT: addps %xmm0, %xmm0 68 ; X32-NEXT: ret 69 } 70 71 define <2 x float> @test4(<2 x float> %A) nounwind { 72 %C = fadd <2 x float> %A, %A 73 ret <2 x float> %C 74 ; X64: test4: 75 ; X64-NEXT: addps %xmm0, %xmm0 76 ; X64-NEXT: ret 77 78 ; W64: test4: 79 ; W64-NEXT: movaps (%rcx), %xmm0 80 ; W64-NEXT: addps %xmm0, %xmm0 81 ; W64-NEXT: ret 82 83 ; X32: test4: 84 ; X32-NEXT: addps %xmm0, %xmm0 85 ; X32-NEXT: ret 86 } 87 88 define <4 x float> @test5(<4 x float> %A) nounwind { 89 %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> 90 %C = fadd <2 x float> %B, %B 91 br label %BB 92 93 BB: 94 %D = fadd <2 x float> %C, %C 95 %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 96 ret <4 x float> %E 97 98 ; X64: test5: 99 ; X64-NEXT: addps %xmm0, %xmm0 100 ; X64-NEXT: addps %xmm0, %xmm0 101 ; X64-NEXT: ret 102 103 ; W64: test5: 104 ; W64-NEXT: movaps (%rcx), %xmm0 105 ; W64-NEXT: addps %xmm0, %xmm0 106 ; W64-NEXT: addps %xmm0, %xmm0 107 ; W64-NEXT: ret 108 109 ; X32: test5: 110 ; X32-NEXT: addps %xmm0, %xmm0 111 ; X32-NEXT: addps %xmm0, %xmm0 112 ; X32-NEXT: ret 113 } 114 115 116