Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs -mtriple=amdgcn-- -o - %s | FileCheck %s
      2 
      3 declare float @llvm.fma.f32(float, float, float)
      4 
      5 ; This checks that rematerialization support of the coalescer does not
      6 ; unnecessarily widen the register class. Without those fixes > 20 VGprs
      7 ; are used here
      8 ; Also check that some rematerialization of the 0 constant happened.
      9 ; CHECK-LABEL: foobar
     10 ; CHECK:  v_mov_b32_e32 v{{[0-9]+}}, 0
     11 ; CHECK:  v_mov_b32_e32 v{{[0-9]+}}, 0
     12 ; CHECK:  v_mov_b32_e32 v{{[0-9]+}}, 0
     13 ; CHECK:  v_mov_b32_e32 v{{[0-9]+}}, 0
     14 ; It's probably OK if this is slightly higher:
     15 ; CHECK: ; NumVgprs: 9
     16 define void @foobar(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %flag) {
     17 entry:
     18   %cmpflag = icmp eq i32 %flag, 1
     19   br i1 %cmpflag, label %loop, label %exit
     20 
     21 loop:
     22   %c = phi i32 [0, %entry], [%cnext, %loop]
     23   %v0 = phi float [0.0, %entry], [%fma.0, %loop]
     24   %v1 = phi float [0.0, %entry], [%fma.1, %loop]
     25   %v2 = phi float [0.0, %entry], [%fma.2, %loop]
     26   %v3 = phi float [0.0, %entry], [%fma.3, %loop]
     27 
     28   ; Try to get the 0 constant to get coalesced into a wide register
     29   %blup = insertelement <4 x float> undef, float %v0, i32 0
     30   store <4 x float> %blup, <4 x float> addrspace(1)* %out
     31 
     32   %load = load <4 x float>, <4 x float> addrspace(1)* %in
     33   %load.0 = extractelement <4 x float> %load, i32 0
     34   %load.1 = extractelement <4 x float> %load, i32 1
     35   %load.2 = extractelement <4 x float> %load, i32 2
     36   %load.3 = extractelement <4 x float> %load, i32 3
     37   %fma.0 = call float @llvm.fma.f32(float %v0, float %load.0, float %v0)
     38   %fma.1 = call float @llvm.fma.f32(float %v1, float %load.1, float %v1)
     39   %fma.2 = call float @llvm.fma.f32(float %v2, float %load.2, float %v2)
     40   %fma.3 = call float @llvm.fma.f32(float %v3, float %load.3, float %v3)
     41 
     42   %cnext = add nsw i32 %c, 1
     43   %cmp = icmp eq i32 %cnext, 42
     44   br i1 %cmp, label %exit, label %loop
     45 
     46 exit:
     47   %ev0 = phi float [0.0, %entry], [%fma.0, %loop]
     48   %ev1 = phi float [0.0, %entry], [%fma.1, %loop]
     49   %ev2 = phi float [0.0, %entry], [%fma.2, %loop]
     50   %ev3 = phi float [0.0, %entry], [%fma.3, %loop]
     51   %dst.0 = insertelement <4 x float> undef,  float %ev0, i32 0
     52   %dst.1 = insertelement <4 x float> %dst.0, float %ev1, i32 1
     53   %dst.2 = insertelement <4 x float> %dst.1, float %ev2, i32 2
     54   %dst.3 = insertelement <4 x float> %dst.2, float %ev3, i32 3
     55   store <4 x float> %dst.3, <4 x float> addrspace(1)* %out
     56   ret void
     57 }
     58