Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
      2 
      3 declare float @llvm.fmuladd.f32(float, float, float)
      4 declare double @llvm.fmuladd.f64(double, double, double)
      5 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
      6 declare float @llvm.fabs.f32(float) nounwind readnone
      7 
      8 ; CHECK-LABEL: {{^}}fmuladd_f32:
      9 ; CHECK: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
     10 
     11 define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
     12                          float addrspace(1)* %in2, float addrspace(1)* %in3) {
     13    %r0 = load float, float addrspace(1)* %in1
     14    %r1 = load float, float addrspace(1)* %in2
     15    %r2 = load float, float addrspace(1)* %in3
     16    %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
     17    store float %r3, float addrspace(1)* %out
     18    ret void
     19 }
     20 
     21 ; CHECK-LABEL: {{^}}fmuladd_f64:
     22 ; CHECK: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
     23 
     24 define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
     25                          double addrspace(1)* %in2, double addrspace(1)* %in3) {
     26    %r0 = load double, double addrspace(1)* %in1
     27    %r1 = load double, double addrspace(1)* %in2
     28    %r2 = load double, double addrspace(1)* %in3
     29    %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
     30    store double %r3, double addrspace(1)* %out
     31    ret void
     32 }
     33 
     34 ; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
     35 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     36 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     37 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
     38 ; CHECK: buffer_store_dword [[R2]]
     39 define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
     40   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
     41   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
     42   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     43   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
     44 
     45   %r1 = load float, float addrspace(1)* %gep.0
     46   %r2 = load float, float addrspace(1)* %gep.1
     47 
     48   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
     49   store float %r3, float addrspace(1)* %gep.out
     50   ret void
     51 }
     52 
     53 ; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
     54 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     55 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     56 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
     57 ; CHECK: buffer_store_dword [[R2]]
     58 define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
     59   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
     60   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
     61   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     62   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
     63 
     64   %r1 = load float, float addrspace(1)* %gep.0
     65   %r2 = load float, float addrspace(1)* %gep.1
     66 
     67   %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
     68   store float %r3, float addrspace(1)* %gep.out
     69   ret void
     70 }
     71 
     72 ; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
     73 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     74 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     75 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
     76 ; CHECK: buffer_store_dword [[R2]]
     77 define void @fadd_a_a_b_f32(float addrspace(1)* %out,
     78                             float addrspace(1)* %in1,
     79                             float addrspace(1)* %in2) {
     80   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
     81   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
     82   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     83   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
     84 
     85   %r0 = load float, float addrspace(1)* %gep.0
     86   %r1 = load float, float addrspace(1)* %gep.1
     87 
     88   %add.0 = fadd float %r0, %r0
     89   %add.1 = fadd float %add.0, %r1
     90   store float %add.1, float addrspace(1)* %out
     91   ret void
     92 }
     93 
     94 ; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
     95 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     96 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     97 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
     98 ; CHECK: buffer_store_dword [[R2]]
     99 define void @fadd_b_a_a_f32(float addrspace(1)* %out,
    100                             float addrspace(1)* %in1,
    101                             float addrspace(1)* %in2) {
    102   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
    103   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
    104   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    105   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
    106 
    107   %r0 = load float, float addrspace(1)* %gep.0
    108   %r1 = load float, float addrspace(1)* %gep.1
    109 
    110   %add.0 = fadd float %r0, %r0
    111   %add.1 = fadd float %r1, %add.0
    112   store float %add.1, float addrspace(1)* %out
    113   ret void
    114 }
    115 
    116 ; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
    117 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    118 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    119 ; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
    120 ; CHECK: buffer_store_dword [[R2]]
    121 define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    122   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
    123   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
    124   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    125   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
    126 
    127   %r1 = load float, float addrspace(1)* %gep.0
    128   %r2 = load float, float addrspace(1)* %gep.1
    129 
    130   %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
    131   store float %r3, float addrspace(1)* %gep.out
    132   ret void
    133 }
    134 
    135 
    136 ; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
    137 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    138 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    139 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
    140 ; CHECK: buffer_store_dword [[R2]]
    141 define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    142   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
    143   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
    144   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    145   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
    146 
    147   %r1 = load float, float addrspace(1)* %gep.0
    148   %r2 = load float, float addrspace(1)* %gep.1
    149 
    150   %r1.fneg = fsub float -0.000000e+00, %r1
    151 
    152   %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
    153   store float %r3, float addrspace(1)* %gep.out
    154   ret void
    155 }
    156 
    157 
    158 ; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
    159 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    160 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    161 ; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
    162 ; CHECK: buffer_store_dword [[R2]]
    163 define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    164   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
    165   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
    166   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    167   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
    168 
    169   %r1 = load float, float addrspace(1)* %gep.0
    170   %r2 = load float, float addrspace(1)* %gep.1
    171 
    172   %r1.fneg = fsub float -0.000000e+00, %r1
    173 
    174   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
    175   store float %r3, float addrspace(1)* %gep.out
    176   ret void
    177 }
    178 
    179 
    180 ; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32
    181 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    182 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    183 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
    184 ; CHECK: buffer_store_dword [[RESULT]]
    185 define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    186   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
    187   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
    188   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    189   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
    190 
    191   %r1 = load float, float addrspace(1)* %gep.0
    192   %r2 = load float, float addrspace(1)* %gep.1
    193 
    194   %r2.fneg = fsub float -0.000000e+00, %r2
    195 
    196   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)
    197   store float %r3, float addrspace(1)* %gep.out
    198   ret void
    199 }
    200