1 ; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s 2 3 define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) { 4 ; CHECK-LABEL: test_vmovs_via_vext_lane0to0: 5 %vec = insertelement <2 x float> %in, float %arg, i32 0 6 %res = fadd <2 x float> %vec, %vec 7 8 ; CHECK: vext.32 d1, d1, d0, #1 9 ; CHECK: vext.32 d1, d1, d1, #1 10 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 11 12 ret <2 x float> %res 13 } 14 15 define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) { 16 ; CHECK-LABEL: test_vmovs_via_vext_lane0to1: 17 %vec = insertelement <2 x float> %in, float %arg, i32 1 18 %res = fadd <2 x float> %vec, %vec 19 20 ; CHECK: vext.32 d1, d1, d1, #1 21 ; CHECK: vext.32 d1, d1, d0, #1 22 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 23 24 ret <2 x float> %res 25 } 26 27 define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) { 28 ; CHECK-LABEL: test_vmovs_via_vext_lane1to0: 29 %vec = insertelement <2 x float> %in, float %arg, i32 0 30 %res = fadd <2 x float> %vec, %vec 31 32 ; CHECK: vext.32 d1, d1, d1, #1 33 ; CHECK: vext.32 d1, d0, d1, #1 34 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 35 36 ret <2 x float> %res 37 } 38 39 define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) { 40 ; CHECK-LABEL: test_vmovs_via_vext_lane1to1: 41 %vec = insertelement <2 x float> %in, float %arg, i32 1 42 %res = fadd <2 x float> %vec, %vec 43 44 ; CHECK: vext.32 d1, d0, d1, #1 45 ; CHECK: vext.32 d1, d1, d1, #1 46 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 47 48 ret <2 x float> %res 49 } 50 51 52 define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) { 53 ; CHECK-LABEL: test_vmovs_via_vdup: 54 55 ; Do an operation (which will end up NEON because of +neonfp) to convince the 56 ; execution-domain pass that NEON is a good thing to use. 57 %res = fadd float %ret, %ret 58 ; It makes sense for LLVM to do the addition in d0 here, because it's going 59 ; to be returned. This means it will want a "vmov s0, s1": 60 ; CHECK: vdup.32 d0, d0[1] 61 62 ret float %res 63 } 64 65 declare float @llvm.sqrt.f32(float) 66 67 declare void @bar() 68 69 ; This is a comp 70 define float @test_ineligible(float, float %in) { 71 ; CHECK-LABEL: test_ineligible: 72 73 %sqrt = call float @llvm.sqrt.f32(float %in) 74 %val = fadd float %sqrt, %sqrt 75 76 ; This call forces a move from a callee-saved register to the return-reg. That 77 ; move is not eligible for conversion to a d-register instructions because the 78 ; use-def chains would be messed up. Primarily a compile-test (we used to 79 ; internal fault). 80 call void @bar() 81 ; CHECK: bl bar 82 ; CHECK: vext.32 83 ; CHECK: vext.32 84 ret float %val 85 } 86 87 define i32 @test_vmovs_no_sreg(i32 %in) { 88 ; CHECK-LABEL: test_vmovs_no_sreg: 89 90 ; Check that the movement to and from GPRs takes place in the NEON domain. 91 ; CHECK: vmov.32 d 92 %x = bitcast i32 %in to float 93 94 %res = fadd float %x, %x 95 96 ; CHECK: vmov.32 r{{[0-9]+}}, d 97 %resi = bitcast float %res to i32 98 99 ret i32 %resi 100 } 101 102 103 ; The point of this test is: 104 ; + Make sure s1 is live before the BL 105 ; + Make sure s1 is clobbered by the BL 106 ; + Convince LLVM to emit a VMOV to S0 107 ; + Convince LLVM to domain-convert this. 108 109 ; When all of those are satisfied, LLVM should *not* mark s1 as an implicit-use 110 ; because it's dead. 111 112 declare float @clobbers_s1(float, float) 113 114 define <2 x float> @test_clobbers_recognised(<2 x float> %invec, float %val) { 115 %elt = call float @clobbers_s1(float %val, float %val) 116 117 %vec = insertelement <2 x float> %invec, float %elt, i32 0 118 %res = fadd <2 x float> %vec, %vec 119 ret <2 x float> %res 120 } 121