Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
      3 
      4 declare float @llvm.fma.f32(float, float, float) #1
      5 declare double @llvm.fma.f64(double, double, double) #1
      6 declare float @llvm.fmuladd.f32(float, float, float) #1
      7 declare float @llvm.amdgcn.div.fixup.f32(float, float, float) #1
      8 
      9 
     10 ; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
     11 ; GCN: s_load_dword [[SGPR:s[0-9]+]],
     12 ; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
     13 ; GCN: buffer_store_dword [[RESULT]]
     14 define amdgpu_kernel void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
     15   %dbl = fadd float %a, %a
     16   store float %dbl, float addrspace(1)* %out, align 4
     17   ret void
     18 }
     19 
     20 ; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op:
     21 ; GCN: s_load_dword [[SGPR:s[0-9]+]],
     22 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
     23 ; GCN: buffer_store_dword [[RESULT]]
     24 define amdgpu_kernel void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
     25   %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
     26   store float %fma, float addrspace(1)* %out, align 4
     27   ret void
     28 }
     29 
     30 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
     31 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
     32 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
     33 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[SGPR1]]
     34 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[SGPR0]], s[[SGPR0]], [[VGPR1]]
     35 ; GCN: buffer_store_dword [[RESULT]]
     36 define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
     37   %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
     38   store float %fma, float addrspace(1)* %out, align 4
     39   ret void
     40 }
     41 
     42 ; GCN-LABEL: {{^}}test_use_s_v_s:
     43 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[SA:[0-9]+]]:[[SB:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
     44 ; SI: buffer_load_dword [[VA0:v[0-9]+]]
     45 ; SI-NEXT: buffer_load_dword [[VA1:v[0-9]+]]
     46 
     47 ; GCN-NOT: v_mov_b32
     48 
     49 ; VI: buffer_load_dword [[VA0:v[0-9]+]]
     50 ; VI-NEXT: buffer_load_dword [[VA1:v[0-9]+]]
     51 
     52 ; GCN-NOT: v_mov_b32
     53 ; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[SB]]
     54 ; GCN-NOT: v_mov_b32
     55 
     56 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SA]], [[VA0]], [[VB]]
     57 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SA]], [[VA1]], [[VB]]
     58 ; GCN: buffer_store_dword [[RESULT0]]
     59 ; GCN: buffer_store_dword [[RESULT1]]
     60 define amdgpu_kernel void @test_use_s_v_s(float addrspace(1)* %out, float %a, float %b, float addrspace(1)* %in) #0 {
     61   %va0 = load volatile float, float addrspace(1)* %in
     62   %va1 = load volatile float, float addrspace(1)* %in
     63   %fma0 = call float @llvm.fma.f32(float %a, float %va0, float %b) #1
     64   %fma1 = call float @llvm.fma.f32(float %a, float %va1, float %b) #1
     65   store volatile float %fma0, float addrspace(1)* %out
     66   store volatile float %fma1, float addrspace(1)* %out
     67   ret void
     68 }
     69 
     70 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
     71 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
     72 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
     73 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[SGPR1]]
     74 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[SGPR0]], [[VGPR1]], s[[SGPR0]]
     75 ; GCN: buffer_store_dword [[RESULT]]
     76 define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
     77   %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
     78   store float %fma, float addrspace(1)* %out, align 4
     79   ret void
     80 }
     81 
     82 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
     83 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
     84 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
     85 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[SGPR1]]
     86 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], s[[SGPR0]], s[[SGPR0]]
     87 ; GCN: buffer_store_dword [[RESULT]]
     88 define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
     89   %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
     90   store float %fma, float addrspace(1)* %out, align 4
     91   ret void
     92 }
     93 
     94 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
     95 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
     96 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
     97 ; GCN: buffer_store_dword [[RESULT]]
     98 define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
     99   %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
    100   store float %fma, float addrspace(1)* %out, align 4
    101   ret void
    102 }
    103 
    104 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
    105 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
    106 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]]
    107 ; GCN: buffer_store_dword [[RESULT]]
    108 define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
    109   %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
    110   store float %fma, float addrspace(1)* %out, align 4
    111   ret void
    112 }
    113 
    114 ; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
    115 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
    116 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
    117 ; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
    118 ; GCN: buffer_store_dword [[RESULT]]
    119 define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_imm_a_a(float addrspace(1)* %out, float %a) #0 {
    120   %val = call float @llvm.amdgcn.div.fixup.f32(float 2.0, float %a, float %a) #1
    121   store float %val, float addrspace(1)* %out, align 4
    122   ret void
    123 }
    124 
    125 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_kimm:
    126 ; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
    127 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    128 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]]
    129 ; GCN: buffer_store_dword [[RESULT]]
    130 define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_kimm(float addrspace(1)* %out, float %a) #0 {
    131   %fma = call float @llvm.fma.f32(float %a, float %a, float 1024.0) #1
    132   store float %fma, float addrspace(1)* %out, align 4
    133   ret void
    134 }
    135 
    136 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s:
    137 ; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
    138 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    139 ; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]]
    140 ; GCN: buffer_store_dword [[RESULT0]]
    141 define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_k_s(float addrspace(1)* %out, float %a) #0 {
    142   %fma = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
    143   store float %fma, float addrspace(1)* %out
    144   ret void
    145 }
    146 
    147 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s_x2:
    148 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
    149 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
    150 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    151 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], s[[SGPR0]]
    152 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VK]], [[VK]], s[[SGPR1]]
    153 ; GCN: buffer_store_dword [[RESULT0]]
    154 ; GCN: buffer_store_dword [[RESULT1]]
    155 ; GCN: s_endpgm
    156 define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_k_s_x2(float addrspace(1)* %out, float %a, float %b) #0 {
    157   %fma0 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
    158   %fma1 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %b) #1
    159   store volatile float %fma0, float addrspace(1)* %out
    160   store volatile float %fma1, float addrspace(1)* %out
    161   ret void
    162 }
    163 
    164 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k:
    165 ; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
    166 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    167 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
    168 ; GCN: buffer_store_dword [[RESULT]]
    169 define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_s_k(float addrspace(1)* %out, float %a) #0 {
    170   %fma = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
    171   store float %fma, float addrspace(1)* %out
    172   ret void
    173 }
    174 
    175 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k_x2:
    176 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
    177 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
    178 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    179 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SGPR0]], [[VK]], [[VK]]
    180 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SGPR1]], [[VK]], [[VK]]
    181 ; GCN: buffer_store_dword [[RESULT0]]
    182 ; GCN: buffer_store_dword [[RESULT1]]
    183 ; GCN: s_endpgm
    184 define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_s_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
    185   %fma0 = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
    186   %fma1 = call float @llvm.fma.f32(float 1024.0, float %b, float 1024.0) #1
    187   store volatile float %fma0, float addrspace(1)* %out
    188   store volatile float %fma1, float addrspace(1)* %out
    189   ret void
    190 }
    191 
    192 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k:
    193 ; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
    194 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    195 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
    196 ; GCN: buffer_store_dword [[RESULT]]
    197 define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k(float addrspace(1)* %out, float %a) #0 {
    198   %fma = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
    199   store float %fma, float addrspace(1)* %out
    200   ret void
    201 }
    202 
    203 ; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k_x2:
    204 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
    205 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
    206 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
    207 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SGPR0]], [[VK]], [[VK]]
    208 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SGPR1]], [[VK]], [[VK]]
    209 ; GCN: buffer_store_dword [[RESULT0]]
    210 ; GCN: buffer_store_dword [[RESULT1]]
    211 ; GCN: s_endpgm
    212 define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
    213   %fma0 = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
    214   %fma1 = call float @llvm.fma.f32(float %b, float 1024.0, float 1024.0) #1
    215   store volatile float %fma0, float addrspace(1)* %out
    216   store volatile float %fma1, float addrspace(1)* %out
    217   ret void
    218 }
    219 
    220 ; GCN-LABEL: {{^}}test_s0_s1_k_f32:
    221 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
    222 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
    223 ; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
    224 ; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], s[[SGPR1]]
    225 
    226 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SGPR0]], [[VS1]], [[VK0]]
    227 ; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
    228 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SGPR0]], [[VS1]], [[VK1]]
    229 
    230 ; GCN: buffer_store_dword [[RESULT0]]
    231 ; GCN: buffer_store_dword [[RESULT1]]
    232 define amdgpu_kernel void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a, float %b) #0 {
    233   %fma0 = call float @llvm.fma.f32(float %a, float %b, float 1024.0) #1
    234   %fma1 = call float @llvm.fma.f32(float %a, float %b, float 4096.0) #1
    235   store volatile float %fma0, float addrspace(1)* %out
    236   store volatile float %fma1, float addrspace(1)* %out
    237   ret void
    238 }
    239 
    240 ; FIXME: Immediate in SGPRs just copied to VGPRs
    241 ; GCN-LABEL: {{^}}test_s0_s1_k_f64:
    242 ; GCN-DAG: s_load_dwordx2 [[SGPR0:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
    243 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR1_SUB0:[0-9]+]]:[[SGPR1_SUB1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x1d|0x74}}
    244 ; GCN-DAG: v_mov_b32_e32 v[[VK0_SUB1:[0-9]+]], 0x40900000
    245 ; GCN-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}}
    246 
    247 ; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB0:[0-9]+]], s[[SGPR1_SUB0]]
    248 ; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB1:[0-9]+]], s[[SGPR1_SUB1]]
    249 ; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}}
    250 
    251 ; Same zero component is re-used for half of each immediate.
    252 ; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000
    253 ; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}}
    254 
    255 ; GCN: buffer_store_dwordx2 [[RESULT0]]
    256 ; GCN: buffer_store_dwordx2 [[RESULT1]]
    257 define amdgpu_kernel void @test_s0_s1_k_f64(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) #0 {
    258   %fma0 = call double @llvm.fma.f64(double %a, double %b, double 1024.0) #1
    259   %fma1 = call double @llvm.fma.f64(double %a, double %b, double 4096.0) #1
    260   store volatile double %fma0, double addrspace(1)* %out
    261   store volatile double %fma1, double addrspace(1)* %out
    262   ret void
    263 }
    264 
    265 attributes #0 = { nounwind }
    266 attributes #1 = { nounwind readnone }
    267