Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 
      4 declare float @llvm.maxnum.f32(float, float) #0
      5 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0
      6 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0
      7 declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #0
      8 declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #0
      9 
     10 declare double @llvm.maxnum.f64(double, double)
     11 
     12 ; FUNC-LABEL: @test_fmax_f32
     13 ; SI: v_max_f32_e32
     14 
     15 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
     16 ; EG: MAX_DX10 {{.*}}[[OUT]]
     17 define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
     18   %val = call float @llvm.maxnum.f32(float %a, float %b) #0
     19   store float %val, float addrspace(1)* %out, align 4
     20   ret void
     21 }
     22 
     23 ; FUNC-LABEL: @test_fmax_v2f32
     24 ; SI: v_max_f32_e32
     25 ; SI: v_max_f32_e32
     26 
     27 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
     28 ; EG: MAX_DX10 {{.*}}[[OUT]]
     29 ; EG: MAX_DX10 {{.*}}[[OUT]]
     30 define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
     31   %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
     32   store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
     33   ret void
     34 }
     35 
     36 ; FUNC-LABEL: @test_fmax_v4f32
     37 ; SI: v_max_f32_e32
     38 ; SI: v_max_f32_e32
     39 ; SI: v_max_f32_e32
     40 ; SI: v_max_f32_e32
     41 
     42 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
     43 ; EG: MAX_DX10 {{.*}}[[OUT]]
     44 ; EG: MAX_DX10 {{.*}}[[OUT]]
     45 ; EG: MAX_DX10 {{.*}}[[OUT]]
     46 ; EG: MAX_DX10 {{.*}}[[OUT]]
     47 define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
     48   %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
     49   store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
     50   ret void
     51 }
     52 
     53 ; FUNC-LABEL: @test_fmax_v8f32
     54 ; SI: v_max_f32_e32
     55 ; SI: v_max_f32_e32
     56 ; SI: v_max_f32_e32
     57 ; SI: v_max_f32_e32
     58 ; SI: v_max_f32_e32
     59 ; SI: v_max_f32_e32
     60 ; SI: v_max_f32_e32
     61 ; SI: v_max_f32_e32
     62 
     63 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
     64 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
     65 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
     66 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
     67 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
     68 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
     69 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
     70 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
     71 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
     72 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
     73 define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
     74   %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
     75   store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
     76   ret void
     77 }
     78 
     79 ; FUNC-LABEL: @test_fmax_v16f32
     80 ; SI: v_max_f32_e32
     81 ; SI: v_max_f32_e32
     82 ; SI: v_max_f32_e32
     83 ; SI: v_max_f32_e32
     84 ; SI: v_max_f32_e32
     85 ; SI: v_max_f32_e32
     86 ; SI: v_max_f32_e32
     87 ; SI: v_max_f32_e32
     88 ; SI: v_max_f32_e32
     89 ; SI: v_max_f32_e32
     90 ; SI: v_max_f32_e32
     91 ; SI: v_max_f32_e32
     92 ; SI: v_max_f32_e32
     93 ; SI: v_max_f32_e32
     94 ; SI: v_max_f32_e32
     95 ; SI: v_max_f32_e32
     96 
     97 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
     98 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
     99 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
    100 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
    101 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
    102 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
    103 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
    104 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
    105 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
    106 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
    107 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
    108 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
    109 ; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X
    110 ; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y
    111 ; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z
    112 ; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W
    113 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X
    114 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
    115 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
    116 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
    117 define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
    118   %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
    119   store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
    120   ret void
    121 }
    122 
    123 ; FUNC-LABEL: @constant_fold_fmax_f32
    124 ; SI-NOT: v_max_f32_e32
    125 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
    126 ; SI: buffer_store_dword [[REG]]
    127 
    128 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    129 ; EG-NOT: MAX_DX10
    130 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
    131 define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
    132   %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
    133   store float %val, float addrspace(1)* %out, align 4
    134   ret void
    135 }
    136 
    137 ; FUNC-LABEL: @constant_fold_fmax_f32_nan_nan
    138 ; SI-NOT: v_max_f32_e32
    139 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
    140 ; SI: buffer_store_dword [[REG]]
    141 
    142 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    143 ; EG-NOT: MAX_DX10
    144 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
    145 ; EG: 2143289344(nan)
    146 define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
    147   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
    148   store float %val, float addrspace(1)* %out, align 4
    149   ret void
    150 }
    151 
    152 ; FUNC-LABEL: @constant_fold_fmax_f32_val_nan
    153 ; SI-NOT: v_max_f32_e32
    154 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
    155 ; SI: buffer_store_dword [[REG]]
    156 
    157 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    158 ; EG-NOT: MAX_DX10
    159 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
    160 define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
    161   %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
    162   store float %val, float addrspace(1)* %out, align 4
    163   ret void
    164 }
    165 
    166 ; FUNC-LABEL: @constant_fold_fmax_f32_nan_val
    167 ; SI-NOT: v_max_f32_e32
    168 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
    169 ; SI: buffer_store_dword [[REG]]
    170 
    171 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    172 ; EG-NOT: MAX_DX10
    173 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
    174 define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
    175   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
    176   store float %val, float addrspace(1)* %out, align 4
    177   ret void
    178 }
    179 
    180 ; FUNC-LABEL: @constant_fold_fmax_f32_p0_p0
    181 ; SI-NOT: v_max_f32_e32
    182 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
    183 ; SI: buffer_store_dword [[REG]]
    184 
    185 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    186 ; EG-NOT: MAX_DX10
    187 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
    188 define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
    189   %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
    190   store float %val, float addrspace(1)* %out, align 4
    191   ret void
    192 }
    193 
    194 ; FUNC-LABEL: @constant_fold_fmax_f32_p0_n0
    195 ; SI-NOT: v_max_f32_e32
    196 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
    197 ; SI: buffer_store_dword [[REG]]
    198 
    199 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    200 ; EG-NOT: MAX_DX10
    201 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
    202 define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
    203   %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
    204   store float %val, float addrspace(1)* %out, align 4
    205   ret void
    206 }
    207 
    208 ; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0
    209 ; SI-NOT: v_max_f32_e32
    210 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
    211 ; SI: buffer_store_dword [[REG]]
    212 
    213 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    214 ; EG-NOT: MAX_DX10
    215 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
    216 define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
    217   %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
    218   store float %val, float addrspace(1)* %out, align 4
    219   ret void
    220 }
    221 
    222 ; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0
    223 ; SI-NOT: v_max_f32_e32
    224 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
    225 ; SI: buffer_store_dword [[REG]]
    226 
    227 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    228 ; EG-NOT: MAX_DX10
    229 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
    230 define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
    231   %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
    232   store float %val, float addrspace(1)* %out, align 4
    233   ret void
    234 }
    235 
    236 ; FUNC-LABEL: @fmax_var_immediate_f32
    237 ; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
    238 
    239 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    240 ; EG-NOT: MAX_DX10
    241 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
    242 define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
    243   %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
    244   store float %val, float addrspace(1)* %out, align 4
    245   ret void
    246 }
    247 
    248 ; FUNC-LABEL: @fmax_immediate_var_f32
    249 ; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
    250 
    251 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    252 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
    253 define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
    254   %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
    255   store float %val, float addrspace(1)* %out, align 4
    256   ret void
    257 }
    258 
    259 ; FUNC-LABEL: @fmax_var_literal_f32
    260 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
    261 ; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
    262 
    263 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    264 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
    265 define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
    266   %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
    267   store float %val, float addrspace(1)* %out, align 4
    268   ret void
    269 }
    270 
    271 ; FUNC-LABEL: @fmax_literal_var_f32
    272 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
    273 ; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
    274 
    275 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
    276 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
    277 define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
    278   %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
    279   store float %val, float addrspace(1)* %out, align 4
    280   ret void
    281 }
    282 
    283 attributes #0 = { nounwind readnone }
    284