Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 
      3 declare i32 @llvm.amdgcn.workitem.id.x() #1
      4 declare float @llvm.fabs.f32(float) #1
      5 declare float @llvm.fma.f32(float, float, float) nounwind readnone
      6 
      7 ; FUNC-LABEL: @commute_add_imm_fabs_f32
      8 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
      9 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
     10 ; SI: buffer_store_dword [[REG]]
     11 define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     12   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
     13   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     14   %x = load float, float addrspace(1)* %gep.0
     15   %x.fabs = call float @llvm.fabs.f32(float %x) #1
     16   %z = fadd float 2.0, %x.fabs
     17   store float %z, float addrspace(1)* %out
     18   ret void
     19 }
     20 
     21 ; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
     22 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     23 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
     24 ; SI: buffer_store_dword [[REG]]
     25 define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     26   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
     27   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     28   %x = load float, float addrspace(1)* %gep.0
     29   %x.fabs = call float @llvm.fabs.f32(float %x) #1
     30   %x.fneg.fabs = fsub float -0.000000e+00, %x.fabs
     31   %z = fmul float 4.0, %x.fneg.fabs
     32   store float %z, float addrspace(1)* %out
     33   ret void
     34 }
     35 
     36 ; FUNC-LABEL: @commute_mul_imm_fneg_f32
     37 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     38 ; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
     39 ; SI: buffer_store_dword [[REG]]
     40 define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     41   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
     42   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     43   %x = load float, float addrspace(1)* %gep.0
     44   %x.fneg = fsub float -0.000000e+00, %x
     45   %z = fmul float 4.0, %x.fneg
     46   store float %z, float addrspace(1)* %out
     47   ret void
     48 }
     49 
     50 ; FIXME: Should use SGPR for literal.
     51 ; FUNC-LABEL: @commute_add_lit_fabs_f32
     52 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     53 ; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000
     54 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
     55 ; SI: buffer_store_dword [[REG]]
     56 define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     57   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
     58   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     59   %x = load float, float addrspace(1)* %gep.0
     60   %x.fabs = call float @llvm.fabs.f32(float %x) #1
     61   %z = fadd float 1024.0, %x.fabs
     62   store float %z, float addrspace(1)* %out
     63   ret void
     64 }
     65 
     66 ; FUNC-LABEL: @commute_add_fabs_f32
     67 ; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     68 ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     69 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]|
     70 ; SI: buffer_store_dword [[REG]]
     71 define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     72   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
     73   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     74   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     75   %x = load volatile float, float addrspace(1)* %gep.0
     76   %y = load volatile float, float addrspace(1)* %gep.1
     77   %y.fabs = call float @llvm.fabs.f32(float %y) #1
     78   %z = fadd float %x, %y.fabs
     79   store float %z, float addrspace(1)* %out
     80   ret void
     81 }
     82 
     83 ; FUNC-LABEL: @commute_mul_fneg_f32
     84 ; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     85 ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     86 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]
     87 ; SI: buffer_store_dword [[REG]]
     88 define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     89   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
     90   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     91   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     92   %x = load volatile float, float addrspace(1)* %gep.0
     93   %y = load volatile float, float addrspace(1)* %gep.1
     94   %y.fneg = fsub float -0.000000e+00, %y
     95   %z = fmul float %x, %y.fneg
     96   store float %z, float addrspace(1)* %out
     97   ret void
     98 }
     99 
    100 ; FUNC-LABEL: @commute_mul_fabs_fneg_f32
    101 ; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    102 ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    103 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]|
    104 ; SI: buffer_store_dword [[REG]]
    105 define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
    106   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
    107   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
    108   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    109   %x = load volatile float, float addrspace(1)* %gep.0
    110   %y = load volatile float, float addrspace(1)* %gep.1
    111   %y.fabs = call float @llvm.fabs.f32(float %y) #1
    112   %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
    113   %z = fmul float %x, %y.fabs.fneg
    114   store float %z, float addrspace(1)* %out
    115   ret void
    116 }
    117 
    118 ; There's no reason to commute this.
    119 ; FUNC-LABEL: @commute_mul_fabs_x_fabs_y_f32
    120 ; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    121 ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    122 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]|
    123 ; SI: buffer_store_dword [[REG]]
    124 define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
    125   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
    126   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
    127   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    128   %x = load volatile float, float addrspace(1)* %gep.0
    129   %y = load volatile float, float addrspace(1)* %gep.1
    130   %x.fabs = call float @llvm.fabs.f32(float %x) #1
    131   %y.fabs = call float @llvm.fabs.f32(float %y) #1
    132   %z = fmul float %x.fabs, %y.fabs
    133   store float %z, float addrspace(1)* %out
    134   ret void
    135 }
    136 
    137 ; FUNC-LABEL: @commute_mul_fabs_x_fneg_fabs_y_f32
    138 ; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    139 ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    140 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]|
    141 ; SI: buffer_store_dword [[REG]]
    142 define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
    143   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
    144   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
    145   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    146   %x = load volatile float, float addrspace(1)* %gep.0
    147   %y = load volatile float, float addrspace(1)* %gep.1
    148   %x.fabs = call float @llvm.fabs.f32(float %x) #1
    149   %y.fabs = call float @llvm.fabs.f32(float %y) #1
    150   %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
    151   %z = fmul float %x.fabs, %y.fabs.fneg
    152   store float %z, float addrspace(1)* %out
    153   ret void
    154 }
    155 
    156 ; Make sure we commute the multiply part for the constant in src0 even
    157 ; though we have negate modifier on src2.
    158 
    159 ; SI-LABEL: {{^}}fma_a_2.0_neg_b_f32
    160 ; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    161 ; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    162 ; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, |[[R2]]|
    163 ; SI: buffer_store_dword [[RESULT]]
    164 define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    165   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    166   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
    167   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    168   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
    169 
    170   %r1 = load volatile float, float addrspace(1)* %gep.0
    171   %r2 = load volatile float, float addrspace(1)* %gep.1
    172 
    173   %r2.fabs = call float @llvm.fabs.f32(float %r2)
    174 
    175   %r3 = tail call float @llvm.fma.f32(float %r1, float 2.0, float %r2.fabs)
    176   store float %r3, float addrspace(1)* %gep.out
    177   ret void
    178 }
    179 
    180 attributes #0 = { nounwind }
    181 attributes #1 = { nounwind readnone }
    182