Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
      2 
      3 define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
      4 ; CHECK-LABEL: test_vmovs_via_vext_lane0to0:
      5   %vec = insertelement <2 x float> %in, float %arg, i32 0
      6   %res = fadd <2 x float> %vec, %vec
      7 
      8 ; CHECK: vext.32 d1, d1, d0, #1
      9 ; CHECK: vext.32 d1, d1, d1, #1
     10 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
     11 
     12   ret <2 x float> %res
     13 }
     14 
     15 define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
     16 ; CHECK-LABEL: test_vmovs_via_vext_lane0to1:
     17   %vec = insertelement <2 x float> %in, float %arg, i32 1
     18   %res = fadd <2 x float> %vec, %vec
     19 
     20 ; CHECK: vext.32 d1, d1, d1, #1
     21 ; CHECK: vext.32 d1, d1, d0, #1
     22 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
     23 
     24   ret <2 x float> %res
     25 }
     26 
     27 define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) {
     28 ; CHECK-LABEL: test_vmovs_via_vext_lane1to0:
     29   %vec = insertelement <2 x float> %in, float %arg, i32 0
     30   %res = fadd <2 x float> %vec, %vec
     31 
     32 ; CHECK: vext.32 d1, d1, d1, #1
     33 ; CHECK: vext.32 d1, d0, d1, #1
     34 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
     35 
     36   ret <2 x float> %res
     37 }
     38 
     39 define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) {
     40 ; CHECK-LABEL: test_vmovs_via_vext_lane1to1:
     41   %vec = insertelement <2 x float> %in, float %arg, i32 1
     42   %res = fadd <2 x float> %vec, %vec
     43 
     44 ; CHECK: vext.32 d1, d0, d1, #1
     45 ; CHECK: vext.32 d1, d1, d1, #1
     46 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
     47 
     48   ret <2 x float> %res
     49 }
     50 
     51 
     52 define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
     53 ; CHECK-LABEL: test_vmovs_via_vdup:
     54 
     55   ; Do an operation (which will end up NEON because of +neonfp) to convince the
     56   ; execution-domain pass that NEON is a good thing to use.
     57   %res = fadd float %ret, %ret
     58   ;  It makes sense for LLVM to do the addition in d0 here, because it's going
     59   ;  to be returned. This means it will want a "vmov s0, s1":
     60 ; CHECK: vdup.32 d0, d0[1]
     61 
     62   ret float %res
     63 }
     64 
     65 declare float @llvm.sqrt.f32(float)
     66 
     67 declare void @bar()
     68 
     69 ; This is a comp
     70 define float @test_ineligible(float, float %in) {
     71 ; CHECK-LABEL: test_ineligible:
     72 
     73   %sqrt = call float @llvm.sqrt.f32(float %in)
     74   %val = fadd float %sqrt, %sqrt
     75 
     76   ; This call forces a move from a callee-saved register to the return-reg. That
     77   ; move is not eligible for conversion to a d-register instructions because the
     78   ; use-def chains would be messed up. Primarily a compile-test (we used to
     79   ; internal fault).
     80   call void @bar()
     81 ; CHECK: bl bar
     82 ; CHECK: vext.32
     83 ; CHECK: vext.32
     84   ret float %val
     85 }
     86 
     87 define i32 @test_vmovs_no_sreg(i32 %in) {
     88 ; CHECK-LABEL: test_vmovs_no_sreg:
     89 
     90   ; Check that the movement to and from GPRs takes place in the NEON domain.
     91 ; CHECK: vmov.32 d
     92   %x = bitcast i32 %in to float
     93 
     94   %res = fadd float %x, %x
     95 
     96 ; CHECK: vmov.32 r{{[0-9]+}}, d
     97   %resi = bitcast float %res to i32
     98 
     99   ret i32 %resi
    100 }
    101 
    102 
    103 ; The point of this test is:
    104 ;   + Make sure s1 is live before the BL
    105 ;   + Make sure s1 is clobbered by the BL
    106 ;   + Convince LLVM to emit a VMOV to S0
    107 ;   + Convince LLVM to domain-convert this.
    108 
    109 ; When all of those are satisfied, LLVM should *not* mark s1 as an implicit-use
    110 ; because it's dead.
    111 
    112 declare float @clobbers_s1(float, float)
    113 
    114 define <2 x float> @test_clobbers_recognised(<2 x float> %invec, float %val) {
    115   %elt = call float @clobbers_s1(float %val, float %val)
    116 
    117   %vec = insertelement <2 x float> %invec, float %elt, i32 0
    118   %res = fadd <2 x float> %vec, %vec
    119   ret <2 x float> %res
    120 }
    121