Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
      2 ; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 ; FIXME: Should replace unsafe-fp-math with no signed zeros.
      6 
      7 declare i32 @llvm.r600.read.tidig.x() #1
      8 
      9 ; FUNC-LABEL: @test_fmax_legacy_uge_f32
     10 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     11 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     12 ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
     13 ; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
     14 
     15 ; EG: MAX
     16 define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     17   %tid = call i32 @llvm.r600.read.tidig.x() #1
     18   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     19   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     20 
     21   %a = load volatile float, float addrspace(1)* %gep.0, align 4
     22   %b = load volatile float, float addrspace(1)* %gep.1, align 4
     23 
     24   %cmp = fcmp uge float %a, %b
     25   %val = select i1 %cmp, float %a, float %b
     26   store float %val, float addrspace(1)* %out, align 4
     27   ret void
     28 }
     29 
     30 ; FUNC-LABEL: @test_fmax_legacy_oge_f32
     31 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     32 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     33 ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
     34 ; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
     35 ; EG: MAX
     36 define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     37   %tid = call i32 @llvm.r600.read.tidig.x() #1
     38   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     39   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     40 
     41   %a = load volatile float, float addrspace(1)* %gep.0, align 4
     42   %b = load volatile float, float addrspace(1)* %gep.1, align 4
     43 
     44   %cmp = fcmp oge float %a, %b
     45   %val = select i1 %cmp, float %a, float %b
     46   store float %val, float addrspace(1)* %out, align 4
     47   ret void
     48 }
     49 
     50 ; FUNC-LABEL: @test_fmax_legacy_ugt_f32
     51 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     52 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     53 ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
     54 ; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
     55 ; EG: MAX
     56 define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     57   %tid = call i32 @llvm.r600.read.tidig.x() #1
     58   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     59   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     60 
     61   %a = load volatile float, float addrspace(1)* %gep.0, align 4
     62   %b = load volatile float, float addrspace(1)* %gep.1, align 4
     63 
     64   %cmp = fcmp ugt float %a, %b
     65   %val = select i1 %cmp, float %a, float %b
     66   store float %val, float addrspace(1)* %out, align 4
     67   ret void
     68 }
     69 
     70 ; FUNC-LABEL: @test_fmax_legacy_ogt_f32
     71 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     72 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     73 ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
     74 ; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
     75 ; EG: MAX
     76 define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     77   %tid = call i32 @llvm.r600.read.tidig.x() #1
     78   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     79   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     80 
     81   %a = load volatile float, float addrspace(1)* %gep.0, align 4
     82   %b = load volatile float, float addrspace(1)* %gep.1, align 4
     83 
     84   %cmp = fcmp ogt float %a, %b
     85   %val = select i1 %cmp, float %a, float %b
     86   store float %val, float addrspace(1)* %out, align 4
     87   ret void
     88 }
     89 
     90 ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32:
     91 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
     92 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     93 ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
     94 ; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
     95 ; EG: MAX
     96 define void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
     97   %tid = call i32 @llvm.r600.read.tidig.x() #1
     98   %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
     99   %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
    100 
    101   %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
    102   %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
    103 
    104   %cmp = fcmp ogt <1 x float> %a, %b
    105   %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
    106   store <1 x float> %val, <1 x float> addrspace(1)* %out
    107   ret void
    108 }
    109 
    110 ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32:
    111 ; SI-SAFE: v_max_legacy_f32_e32
    112 ; SI-SAFE: v_max_legacy_f32_e32
    113 ; SI-SAFE: v_max_legacy_f32_e32
    114 ; SI-NONAN: v_max_f32_e32
    115 ; SI-NONAN: v_max_f32_e32
    116 ; SI-NONAN: v_max_f32_e32
    117 define void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
    118   %tid = call i32 @llvm.r600.read.tidig.x() #1
    119   %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
    120   %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
    121 
    122   %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
    123   %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
    124 
    125   %cmp = fcmp ogt <3 x float> %a, %b
    126   %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
    127   store <3 x float> %val, <3 x float> addrspace(1)* %out
    128   ret void
    129 }
    130 
    131 ; FUNC-LABEL: @test_fmax_legacy_ogt_f32_multi_use
    132 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    133 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    134 ; SI-NOT: v_max_
    135 ; SI: v_cmp_gt_f32
    136 ; SI-NEXT: v_cndmask_b32
    137 ; SI-NOT: v_max_
    138 
    139 ; EG: MAX
    140 define void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
    141   %tid = call i32 @llvm.r600.read.tidig.x() #1
    142   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
    143   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    144 
    145   %a = load volatile float, float addrspace(1)* %gep.0, align 4
    146   %b = load volatile float, float addrspace(1)* %gep.1, align 4
    147 
    148   %cmp = fcmp ogt float %a, %b
    149   %val = select i1 %cmp, float %a, float %b
    150   store float %val, float addrspace(1)* %out0, align 4
    151   store i1 %cmp, i1addrspace(1)* %out1
    152   ret void
    153 }
    154 
    155 attributes #0 = { nounwind }
    156 attributes #1 = { nounwind readnone }
    157