Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
      3 
      4 declare float @llvm.fma.f32(float, float, float) #1
      5 declare double @llvm.fma.f64(double, double, double) #1
      6 declare float @llvm.fmuladd.f32(float, float, float) #1
      7 declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
      8 
      9 
     10 ; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
     11 ; GCN: s_load_dword [[SGPR:s[0-9]+]],
     12 ; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
     13 ; GCN: buffer_store_dword [[RESULT]]
     14 define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
     15   %dbl = fadd float %a, %a
     16   store float %dbl, float addrspace(1)* %out, align 4
     17   ret void
     18 }
     19 
     20 ; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op:
     21 ; GCN: s_load_dword [[SGPR:s[0-9]+]],
     22 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
     23 ; GCN: buffer_store_dword [[RESULT]]
     24 define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
     25   %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
     26   store float %fma, float addrspace(1)* %out, align 4
     27   ret void
     28 }
     29 
     30 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
     31 ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
     32 ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
     33 ; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
     34 ; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
     35 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
     36 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
     37 ; GCN: buffer_store_dword [[RESULT]]
     38 define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
     39   %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
     40   store float %fma, float addrspace(1)* %out, align 4
     41   ret void
     42 }
     43 
     44 ; GCN-LABEL: {{^}}test_use_s_v_s:
     45 ; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
     46 ; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
     47 
     48 ; GCN: buffer_load_dword [[VA0:v[0-9]+]]
     49 ; GCN-NOT: v_mov_b32
     50 ; GCN: buffer_load_dword [[VA1:v[0-9]+]]
     51 
     52 ; GCN-NOT: v_mov_b32
     53 ; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
     54 ; GCN-NOT: v_mov_b32
     55 
     56 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VA0]], [[SA]], [[VB]]
     57 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VA1]], [[SA]], [[VB]]
     58 ; GCN: buffer_store_dword [[RESULT0]]
     59 ; GCN: buffer_store_dword [[RESULT1]]
     60 define void @test_use_s_v_s(float addrspace(1)* %out, float %a, float %b, float addrspace(1)* %in) #0 {
     61   %va0 = load volatile float, float addrspace(1)* %in
     62   %va1 = load volatile float, float addrspace(1)* %in
     63   %fma0 = call float @llvm.fma.f32(float %a, float %va0, float %b) #1
     64   %fma1 = call float @llvm.fma.f32(float %a, float %va1, float %b) #1
     65   store volatile float %fma0, float addrspace(1)* %out
     66   store volatile float %fma1, float addrspace(1)* %out
     67   ret void
     68 }
     69 
     70 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
     71 ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
     72 ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
     73 ; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
     74 ; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
     75 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
     76 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
     77 ; GCN: buffer_store_dword [[RESULT]]
     78 define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
     79   %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
     80   store float %fma, float addrspace(1)* %out, align 4
     81   ret void
     82 }
     83 
     84 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
     85 ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
     86 ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
     87 ; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
     88 ; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
     89 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
     90 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
     91 ; GCN: buffer_store_dword [[RESULT]]
     92 define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
     93   %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
     94   store float %fma, float addrspace(1)* %out, align 4
     95   ret void
     96 }
     97 
     98 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
     99 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
    100 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
    101 ; GCN: buffer_store_dword [[RESULT]]
    102 define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
    103   %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
    104   store float %fma, float addrspace(1)* %out, align 4
    105   ret void
    106 }
    107 
    108 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
    109 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
    110 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
    111 ; GCN: buffer_store_dword [[RESULT]]
    112 define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
    113   %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
    114   store float %fma, float addrspace(1)* %out, align 4
    115   ret void
    116 }
    117 
    118 ; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
    119 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
    120 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
    121 ; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
    122 ; GCN: buffer_store_dword [[RESULT]]
    123 define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
    124   %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
    125   store i32 %fma, i32 addrspace(1)* %out, align 4
    126   ret void
    127 }
    128 
    129 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_kimm:
    130 ; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
    131 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    132 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]]
    133 ; GCN: buffer_store_dword [[RESULT]]
    134 define void @test_sgpr_use_twice_ternary_op_a_a_kimm(float addrspace(1)* %out, float %a) #0 {
    135   %fma = call float @llvm.fma.f32(float %a, float %a, float 1024.0) #1
    136   store float %fma, float addrspace(1)* %out, align 4
    137   ret void
    138 }
    139 
    140 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s:
    141 ; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
    142 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    143 ; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]]
    144 ; GCN: buffer_store_dword [[RESULT0]]
    145 define void @test_literal_use_twice_ternary_op_k_k_s(float addrspace(1)* %out, float %a) #0 {
    146   %fma = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
    147   store float %fma, float addrspace(1)* %out
    148   ret void
    149 }
    150 
    151 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s_x2:
    152 ; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
    153 ; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
    154 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    155 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR0]]
    156 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VK]], [[VK]], [[SGPR1]]
    157 ; GCN: buffer_store_dword [[RESULT0]]
    158 ; GCN: buffer_store_dword [[RESULT1]]
    159 ; GCN: s_endpgm
    160 define void @test_literal_use_twice_ternary_op_k_k_s_x2(float addrspace(1)* %out, float %a, float %b) #0 {
    161   %fma0 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
    162   %fma1 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %b) #1
    163   store volatile float %fma0, float addrspace(1)* %out
    164   store volatile float %fma1, float addrspace(1)* %out
    165   ret void
    166 }
    167 
    168 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k:
    169 ; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
    170 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    171 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
    172 ; GCN: buffer_store_dword [[RESULT]]
    173 define void @test_literal_use_twice_ternary_op_k_s_k(float addrspace(1)* %out, float %a) #0 {
    174   %fma = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
    175   store float %fma, float addrspace(1)* %out
    176   ret void
    177 }
    178 
    179 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k_x2:
    180 ; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
    181 ; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
    182 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    183 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]]
    184 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]]
    185 ; GCN: buffer_store_dword [[RESULT0]]
    186 ; GCN: buffer_store_dword [[RESULT1]]
    187 ; GCN: s_endpgm
    188 define void @test_literal_use_twice_ternary_op_k_s_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
    189   %fma0 = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
    190   %fma1 = call float @llvm.fma.f32(float 1024.0, float %b, float 1024.0) #1
    191   store volatile float %fma0, float addrspace(1)* %out
    192   store volatile float %fma1, float addrspace(1)* %out
    193   ret void
    194 }
    195 
    196 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k:
    197 ; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
    198 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    199 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
    200 ; GCN: buffer_store_dword [[RESULT]]
    201 define void @test_literal_use_twice_ternary_op_s_k_k(float addrspace(1)* %out, float %a) #0 {
    202   %fma = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
    203   store float %fma, float addrspace(1)* %out
    204   ret void
    205 }
    206 
    207 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k_x2:
    208 ; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
    209 ; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
    210 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    211 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]]
    212 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]]
    213 ; GCN: buffer_store_dword [[RESULT0]]
    214 ; GCN: buffer_store_dword [[RESULT1]]
    215 ; GCN: s_endpgm
    216 define void @test_literal_use_twice_ternary_op_s_k_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
    217   %fma0 = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
    218   %fma1 = call float @llvm.fma.f32(float %b, float 1024.0, float 1024.0) #1
    219   store volatile float %fma0, float addrspace(1)* %out
    220   store volatile float %fma1, float addrspace(1)* %out
    221   ret void
    222 }
    223 
    224 ; GCN-LABEL: {{^}}test_s0_s1_k_f32:
    225 ; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
    226 ; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
    227 ; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
    228 ; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], [[SGPR1]]
    229 
    230 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK0]]
    231 ; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
    232 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK1]]
    233 
    234 ; GCN: buffer_store_dword [[RESULT0]]
    235 ; GCN: buffer_store_dword [[RESULT1]]
    236 define void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a, float %b) #0 {
    237   %fma0 = call float @llvm.fma.f32(float %a, float %b, float 1024.0) #1
    238   %fma1 = call float @llvm.fma.f32(float %a, float %b, float 4096.0) #1
    239   store volatile float %fma0, float addrspace(1)* %out
    240   store volatile float %fma1, float addrspace(1)* %out
    241   ret void
    242 }
    243 
    244 ; FIXME: Immediate in SGPRs just copied to VGPRs
    245 ; GCN-LABEL: {{^}}test_s0_s1_k_f64:
    246 ; GCN-DAG: s_load_dwordx2 [[SGPR0:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
    247 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR1_SUB0:[0-9]+]]:[[SGPR1_SUB1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
    248 ; GCN-DAG: v_mov_b32_e32 v[[VK0_SUB1:[0-9]+]], 0x40900000
    249 ; GCN-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}}
    250 
    251 ; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB0:[0-9]+]], s[[SGPR1_SUB0]]
    252 ; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB1:[0-9]+]], s[[SGPR1_SUB1]]
    253 ; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}}
    254 
    255 ; Same zero component is re-used for half of each immediate.
    256 ; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000
    257 ; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}}
    258 
    259 ; GCN: buffer_store_dwordx2 [[RESULT0]]
    260 ; GCN: buffer_store_dwordx2 [[RESULT1]]
    261 define void @test_s0_s1_k_f64(double addrspace(1)* %out, double %a, double %b) #0 {
    262   %fma0 = call double @llvm.fma.f64(double %a, double %b, double 1024.0) #1
    263   %fma1 = call double @llvm.fma.f64(double %a, double %b, double 4096.0) #1
    264   store volatile double %fma0, double addrspace(1)* %out
    265   store volatile double %fma1, double addrspace(1)* %out
    266   ret void
    267 }
    268 
    269 attributes #0 = { nounwind }
    270 attributes #1 = { nounwind readnone }
    271