1 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 4 declare float @llvm.maxnum.f32(float, float) #0 5 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0 6 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0 7 declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #0 8 declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #0 9 10 declare double @llvm.maxnum.f64(double, double) 11 12 ; FUNC-LABEL: @test_fmax_f32 13 ; SI: v_max_f32_e32 14 15 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 16 ; EG: MAX_DX10 {{.*}}[[OUT]] 17 define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind { 18 %val = call float @llvm.maxnum.f32(float %a, float %b) #0 19 store float %val, float addrspace(1)* %out, align 4 20 ret void 21 } 22 23 ; FUNC-LABEL: @test_fmax_v2f32 24 ; SI: v_max_f32_e32 25 ; SI: v_max_f32_e32 26 27 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]] 28 ; EG: MAX_DX10 {{.*}}[[OUT]] 29 ; EG: MAX_DX10 {{.*}}[[OUT]] 30 define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind { 31 %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0 32 store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 33 ret void 34 } 35 36 ; FUNC-LABEL: @test_fmax_v4f32 37 ; SI: v_max_f32_e32 38 ; SI: v_max_f32_e32 39 ; SI: v_max_f32_e32 40 ; SI: v_max_f32_e32 41 42 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]] 43 ; EG: MAX_DX10 {{.*}}[[OUT]] 44 ; EG: MAX_DX10 {{.*}}[[OUT]] 45 ; EG: MAX_DX10 {{.*}}[[OUT]] 46 ; EG: MAX_DX10 {{.*}}[[OUT]] 47 define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind { 48 %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0 49 store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 50 ret void 51 } 52 53 ; FUNC-LABEL: @test_fmax_v8f32 54 ; SI: v_max_f32_e32 55 ; SI: v_max_f32_e32 56 ; SI: v_max_f32_e32 57 ; SI: v_max_f32_e32 58 ; SI: v_max_f32_e32 59 ; SI: v_max_f32_e32 60 ; SI: v_max_f32_e32 61 ; SI: v_max_f32_e32 62 63 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]] 64 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]] 65 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X 66 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y 67 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z 68 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W 69 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X 70 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y 71 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z 72 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W 73 define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind { 74 %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0 75 store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 76 ret void 77 } 78 79 ; FUNC-LABEL: @test_fmax_v16f32 80 ; SI: v_max_f32_e32 81 ; SI: v_max_f32_e32 82 ; SI: v_max_f32_e32 83 ; SI: v_max_f32_e32 84 ; SI: v_max_f32_e32 85 ; SI: v_max_f32_e32 86 ; SI: v_max_f32_e32 87 ; SI: v_max_f32_e32 88 ; SI: v_max_f32_e32 89 ; SI: v_max_f32_e32 90 ; SI: v_max_f32_e32 91 ; SI: v_max_f32_e32 92 ; SI: v_max_f32_e32 93 ; SI: v_max_f32_e32 94 ; SI: v_max_f32_e32 95 ; SI: v_max_f32_e32 96 97 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]] 98 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]] 99 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]] 100 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]] 101 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X 102 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y 103 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z 104 ; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W 105 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X 106 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y 107 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z 108 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W 109 ; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X 110 ; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y 111 ; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z 112 ; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W 113 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X 114 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y 115 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z 116 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W 117 define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind { 118 %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0 119 store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 120 ret void 121 } 122 123 ; FUNC-LABEL: @constant_fold_fmax_f32 124 ; SI-NOT: v_max_f32_e32 125 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0 126 ; SI: buffer_store_dword [[REG]] 127 128 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 129 ; EG-NOT: MAX_DX10 130 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 131 define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind { 132 %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0 133 store float %val, float addrspace(1)* %out, align 4 134 ret void 135 } 136 137 ; FUNC-LABEL: @constant_fold_fmax_f32_nan_nan 138 ; SI-NOT: v_max_f32_e32 139 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 140 ; SI: buffer_store_dword [[REG]] 141 142 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 143 ; EG-NOT: MAX_DX10 144 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 145 ; EG: 2143289344(nan) 146 define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind { 147 %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0 148 store float %val, float addrspace(1)* %out, align 4 149 ret void 150 } 151 152 ; FUNC-LABEL: @constant_fold_fmax_f32_val_nan 153 ; SI-NOT: v_max_f32_e32 154 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 155 ; SI: buffer_store_dword [[REG]] 156 157 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 158 ; EG-NOT: MAX_DX10 159 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 160 define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind { 161 %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0 162 store float %val, float addrspace(1)* %out, align 4 163 ret void 164 } 165 166 ; FUNC-LABEL: @constant_fold_fmax_f32_nan_val 167 ; SI-NOT: v_max_f32_e32 168 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 169 ; SI: buffer_store_dword [[REG]] 170 171 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 172 ; EG-NOT: MAX_DX10 173 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 174 define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind { 175 %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0 176 store float %val, float addrspace(1)* %out, align 4 177 ret void 178 } 179 180 ; FUNC-LABEL: @constant_fold_fmax_f32_p0_p0 181 ; SI-NOT: v_max_f32_e32 182 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0 183 ; SI: buffer_store_dword [[REG]] 184 185 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 186 ; EG-NOT: MAX_DX10 187 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 188 define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind { 189 %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0 190 store float %val, float addrspace(1)* %out, align 4 191 ret void 192 } 193 194 ; FUNC-LABEL: @constant_fold_fmax_f32_p0_n0 195 ; SI-NOT: v_max_f32_e32 196 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0 197 ; SI: buffer_store_dword [[REG]] 198 199 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 200 ; EG-NOT: MAX_DX10 201 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 202 define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind { 203 %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0 204 store float %val, float addrspace(1)* %out, align 4 205 ret void 206 } 207 208 ; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0 209 ; SI-NOT: v_max_f32_e32 210 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 211 ; SI: buffer_store_dword [[REG]] 212 213 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 214 ; EG-NOT: MAX_DX10 215 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 216 define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind { 217 %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0 218 store float %val, float addrspace(1)* %out, align 4 219 ret void 220 } 221 222 ; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0 223 ; SI-NOT: v_max_f32_e32 224 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 225 ; SI: buffer_store_dword [[REG]] 226 227 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 228 ; EG-NOT: MAX_DX10 229 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 230 define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind { 231 %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0 232 store float %val, float addrspace(1)* %out, align 4 233 ret void 234 } 235 236 ; FUNC-LABEL: @fmax_var_immediate_f32 237 ; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} 238 239 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 240 ; EG-NOT: MAX_DX10 241 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 242 define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind { 243 %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0 244 store float %val, float addrspace(1)* %out, align 4 245 ret void 246 } 247 248 ; FUNC-LABEL: @fmax_immediate_var_f32 249 ; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} 250 251 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 252 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} 253 define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind { 254 %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0 255 store float %val, float addrspace(1)* %out, align 4 256 ret void 257 } 258 259 ; FUNC-LABEL: @fmax_var_literal_f32 260 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 261 ; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 262 263 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 264 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} 265 define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind { 266 %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0 267 store float %val, float addrspace(1)* %out, align 4 268 ret void 269 } 270 271 ; FUNC-LABEL: @fmax_literal_var_f32 272 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 273 ; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 274 275 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 276 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} 277 define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind { 278 %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0 279 store float %val, float addrspace(1)* %out, align 4 280 ret void 281 } 282 283 attributes #0 = { nounwind readnone } 284