1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s 2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s 3 4 ; Use a 64-bit value with lo bits that can be represented as an inline constant 5 ; GCN-LABEL: {{^}}i64_imm_inline_lo: 6 ; GCN: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], 5 7 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VGPR]]: 8 define amdgpu_kernel void @i64_imm_inline_lo(i64 addrspace(1) *%out) { 9 entry: 10 store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005 11 ret void 12 } 13 14 ; Use a 64-bit value with hi bits that can be represented as an inline constant 15 ; GCN-LABEL: {{^}}i64_imm_inline_hi: 16 ; GCN: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], 5 17 ; GCN: buffer_store_dwordx2 v{{\[[0-9]+:}}[[HI_VGPR]] 18 define amdgpu_kernel void @i64_imm_inline_hi(i64 addrspace(1) *%out) { 19 entry: 20 store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678 21 ret void 22 } 23 24 ; GCN-LABEL: {{^}}store_imm_neg_0.0_i64: 25 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 26 ; GCN-DAG: v_bfrev_b32_e32 v[[HI_VREG:[0-9]+]], 1{{$}} 27 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 28 define amdgpu_kernel void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) { 29 store i64 -9223372036854775808, i64 addrspace(1) *%out 30 ret void 31 } 32 33 ; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_i32: 34 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 35 ; GCN: buffer_store_dword [[REG]] 36 define amdgpu_kernel void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) { 37 store i32 -2147483648, i32 addrspace(1)* %out 38 ret void 39 } 40 41 ; GCN-LABEL: {{^}}store_inline_imm_0.0_f32: 42 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} 43 ; GCN: buffer_store_dword [[REG]] 44 define amdgpu_kernel void @store_inline_imm_0.0_f32(float addrspace(1)* %out) { 45 store float 0.0, float addrspace(1)* %out 46 ret void 47 } 48 49 ; GCN-LABEL: {{^}}store_imm_neg_0.0_f32: 50 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 51 ; GCN: buffer_store_dword [[REG]] 52 define amdgpu_kernel void @store_imm_neg_0.0_f32(float addrspace(1)* %out) { 53 store float -0.0, float addrspace(1)* %out 54 ret void 55 } 56 57 ; GCN-LABEL: {{^}}store_inline_imm_0.5_f32: 58 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}} 59 ; GCN: buffer_store_dword [[REG]] 60 define amdgpu_kernel void @store_inline_imm_0.5_f32(float addrspace(1)* %out) { 61 store float 0.5, float addrspace(1)* %out 62 ret void 63 } 64 65 ; GCN-LABEL: {{^}}store_inline_imm_m_0.5_f32: 66 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}} 67 ; GCN: buffer_store_dword [[REG]] 68 define amdgpu_kernel void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) { 69 store float -0.5, float addrspace(1)* %out 70 ret void 71 } 72 73 ; GCN-LABEL: {{^}}store_inline_imm_1.0_f32: 74 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}} 75 ; GCN: buffer_store_dword [[REG]] 76 define amdgpu_kernel void @store_inline_imm_1.0_f32(float addrspace(1)* %out) { 77 store float 1.0, float addrspace(1)* %out 78 ret void 79 } 80 81 ; GCN-LABEL: {{^}}store_inline_imm_m_1.0_f32: 82 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}} 83 ; GCN: buffer_store_dword [[REG]] 84 define amdgpu_kernel void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) { 85 store float -1.0, float addrspace(1)* %out 86 ret void 87 } 88 89 ; GCN-LABEL: {{^}}store_inline_imm_2.0_f32: 90 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}} 91 ; GCN: buffer_store_dword [[REG]] 92 define amdgpu_kernel void @store_inline_imm_2.0_f32(float addrspace(1)* %out) { 93 store float 2.0, float addrspace(1)* %out 94 ret void 95 } 96 97 ; GCN-LABEL: {{^}}store_inline_imm_m_2.0_f32: 98 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}} 99 ; GCN: buffer_store_dword [[REG]] 100 define amdgpu_kernel void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) { 101 store float -2.0, float addrspace(1)* %out 102 ret void 103 } 104 105 ; GCN-LABEL: {{^}}store_inline_imm_4.0_f32: 106 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}} 107 ; GCN: buffer_store_dword [[REG]] 108 define amdgpu_kernel void @store_inline_imm_4.0_f32(float addrspace(1)* %out) { 109 store float 4.0, float addrspace(1)* %out 110 ret void 111 } 112 113 ; GCN-LABEL: {{^}}store_inline_imm_m_4.0_f32: 114 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}} 115 ; GCN: buffer_store_dword [[REG]] 116 define amdgpu_kernel void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) { 117 store float -4.0, float addrspace(1)* %out 118 ret void 119 } 120 121 122 ; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f32: 123 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e22f983{{$}} 124 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0.15915494{{$}} 125 ; GCN: buffer_store_dword [[REG]] 126 define amdgpu_kernel void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) { 127 store float 0x3FC45F3060000000, float addrspace(1)* %out 128 ret void 129 } 130 131 ; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f32: 132 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbe22f983{{$}} 133 ; GCN: buffer_store_dword [[REG]] 134 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) { 135 store float 0xBFC45F3060000000, float addrspace(1)* %out 136 ret void 137 } 138 139 ; GCN-LABEL: {{^}}store_literal_imm_f32: 140 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000 141 ; GCN: buffer_store_dword [[REG]] 142 define amdgpu_kernel void @store_literal_imm_f32(float addrspace(1)* %out) { 143 store float 4096.0, float addrspace(1)* %out 144 ret void 145 } 146 147 ; GCN-LABEL: {{^}}add_inline_imm_0.0_f32: 148 ; GCN: s_load_dword [[VAL:s[0-9]+]] 149 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 0{{$}} 150 ; GCN: buffer_store_dword [[REG]] 151 define amdgpu_kernel void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) { 152 %y = fadd float %x, 0.0 153 store float %y, float addrspace(1)* %out 154 ret void 155 } 156 157 ; GCN-LABEL: {{^}}add_inline_imm_0.5_f32: 158 ; GCN: s_load_dword [[VAL:s[0-9]+]] 159 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}} 160 ; GCN: buffer_store_dword [[REG]] 161 define amdgpu_kernel void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) { 162 %y = fadd float %x, 0.5 163 store float %y, float addrspace(1)* %out 164 ret void 165 } 166 167 ; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_f32: 168 ; GCN: s_load_dword [[VAL:s[0-9]+]] 169 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}} 170 ; GCN: buffer_store_dword [[REG]] 171 define amdgpu_kernel void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) { 172 %y = fadd float %x, -0.5 173 store float %y, float addrspace(1)* %out 174 ret void 175 } 176 177 ; GCN-LABEL: {{^}}add_inline_imm_1.0_f32: 178 ; GCN: s_load_dword [[VAL:s[0-9]+]] 179 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}} 180 ; GCN: buffer_store_dword [[REG]] 181 define amdgpu_kernel void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) { 182 %y = fadd float %x, 1.0 183 store float %y, float addrspace(1)* %out 184 ret void 185 } 186 187 ; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_f32: 188 ; GCN: s_load_dword [[VAL:s[0-9]+]] 189 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}} 190 ; GCN: buffer_store_dword [[REG]] 191 define amdgpu_kernel void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) { 192 %y = fadd float %x, -1.0 193 store float %y, float addrspace(1)* %out 194 ret void 195 } 196 197 ; GCN-LABEL: {{^}}add_inline_imm_2.0_f32: 198 ; GCN: s_load_dword [[VAL:s[0-9]+]] 199 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}} 200 ; GCN: buffer_store_dword [[REG]] 201 define amdgpu_kernel void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) { 202 %y = fadd float %x, 2.0 203 store float %y, float addrspace(1)* %out 204 ret void 205 } 206 207 ; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_f32: 208 ; GCN: s_load_dword [[VAL:s[0-9]+]] 209 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}} 210 ; GCN: buffer_store_dword [[REG]] 211 define amdgpu_kernel void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) { 212 %y = fadd float %x, -2.0 213 store float %y, float addrspace(1)* %out 214 ret void 215 } 216 217 ; GCN-LABEL: {{^}}add_inline_imm_4.0_f32: 218 ; GCN: s_load_dword [[VAL:s[0-9]+]] 219 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}} 220 ; GCN: buffer_store_dword [[REG]] 221 define amdgpu_kernel void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) { 222 %y = fadd float %x, 4.0 223 store float %y, float addrspace(1)* %out 224 ret void 225 } 226 227 ; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_f32: 228 ; GCN: s_load_dword [[VAL:s[0-9]+]] 229 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}} 230 ; GCN: buffer_store_dword [[REG]] 231 define amdgpu_kernel void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) { 232 %y = fadd float %x, -4.0 233 store float %y, float addrspace(1)* %out 234 ret void 235 } 236 237 ; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_f32: 238 ; GCN: buffer_load_dword [[VAL:v[0-9]+]] 239 ; GCN: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]] 240 ; GCN: buffer_store_dword [[REG]] 241 define amdgpu_kernel void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 242 %x = load float, float addrspace(1)* %in 243 %y = fadd float %x, 0.5 244 store float %y, float addrspace(1)* %out 245 ret void 246 } 247 248 ; GCN-LABEL: {{^}}commute_add_literal_f32: 249 ; GCN: buffer_load_dword [[VAL:v[0-9]+]] 250 ; GCN: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]] 251 ; GCN: buffer_store_dword [[REG]] 252 define amdgpu_kernel void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 253 %x = load float, float addrspace(1)* %in 254 %y = fadd float %x, 1024.0 255 store float %y, float addrspace(1)* %out 256 ret void 257 } 258 259 ; GCN-LABEL: {{^}}add_inline_imm_1_f32: 260 ; GCN: s_load_dword [[VAL:s[0-9]+]] 261 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 1{{$}} 262 ; GCN: buffer_store_dword [[REG]] 263 define amdgpu_kernel void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) { 264 %y = fadd float %x, 0x36a0000000000000 265 store float %y, float addrspace(1)* %out 266 ret void 267 } 268 269 ; GCN-LABEL: {{^}}add_inline_imm_2_f32: 270 ; GCN: s_load_dword [[VAL:s[0-9]+]] 271 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 2{{$}} 272 ; GCN: buffer_store_dword [[REG]] 273 define amdgpu_kernel void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) { 274 %y = fadd float %x, 0x36b0000000000000 275 store float %y, float addrspace(1)* %out 276 ret void 277 } 278 279 ; GCN-LABEL: {{^}}add_inline_imm_16_f32: 280 ; GCN: s_load_dword [[VAL:s[0-9]+]] 281 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 16 282 ; GCN: buffer_store_dword [[REG]] 283 define amdgpu_kernel void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) { 284 %y = fadd float %x, 0x36e0000000000000 285 store float %y, float addrspace(1)* %out 286 ret void 287 } 288 289 ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f32: 290 ; GCN: s_add_i32 [[VAL:s[0-9]+]], s0, -1 291 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] 292 ; GCN: buffer_store_dword [[REG]] 293 define amdgpu_kernel void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) { 294 %xbc = bitcast float %x to i32 295 %y = add i32 %xbc, -1 296 %ybc = bitcast i32 %y to float 297 store float %ybc, float addrspace(1)* %out 298 ret void 299 } 300 301 ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f32: 302 ; GCN: s_add_i32 [[VAL:s[0-9]+]], s0, -2 303 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] 304 ; GCN: buffer_store_dword [[REG]] 305 define amdgpu_kernel void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) { 306 %xbc = bitcast float %x to i32 307 %y = add i32 %xbc, -2 308 %ybc = bitcast i32 %y to float 309 store float %ybc, float addrspace(1)* %out 310 ret void 311 } 312 313 ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f32: 314 ; GCN: s_add_i32 [[VAL:s[0-9]+]], s0, -16 315 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] 316 ; GCN: buffer_store_dword [[REG]] 317 define amdgpu_kernel void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) { 318 %xbc = bitcast float %x to i32 319 %y = add i32 %xbc, -16 320 %ybc = bitcast i32 %y to float 321 store float %ybc, float addrspace(1)* %out 322 ret void 323 } 324 325 ; GCN-LABEL: {{^}}add_inline_imm_63_f32: 326 ; GCN: s_load_dword [[VAL:s[0-9]+]] 327 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 63 328 ; GCN: buffer_store_dword [[REG]] 329 define amdgpu_kernel void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) { 330 %y = fadd float %x, 0x36ff800000000000 331 store float %y, float addrspace(1)* %out 332 ret void 333 } 334 335 ; GCN-LABEL: {{^}}add_inline_imm_64_f32: 336 ; GCN: s_load_dword [[VAL:s[0-9]+]] 337 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 64 338 ; GCN: buffer_store_dword [[REG]] 339 define amdgpu_kernel void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { 340 %y = fadd float %x, 0x3700000000000000 341 store float %y, float addrspace(1)* %out 342 ret void 343 } 344 345 346 ; GCN-LABEL: {{^}}add_inline_imm_0.0_f64: 347 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 348 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 349 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}} 350 ; GCN: buffer_store_dwordx2 [[REG]] 351 define amdgpu_kernel void @add_inline_imm_0.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 352 %y = fadd double %x, 0.0 353 store double %y, double addrspace(1)* %out 354 ret void 355 } 356 357 ; GCN-LABEL: {{^}}add_inline_imm_0.5_f64: 358 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 359 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 360 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 0.5 361 ; GCN: buffer_store_dwordx2 [[REG]] 362 define amdgpu_kernel void @add_inline_imm_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) { 363 %y = fadd double %x, 0.5 364 store double %y, double addrspace(1)* %out 365 ret void 366 } 367 368 ; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_f64: 369 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 370 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 371 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -0.5 372 ; GCN: buffer_store_dwordx2 [[REG]] 373 define amdgpu_kernel void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) { 374 %y = fadd double %x, -0.5 375 store double %y, double addrspace(1)* %out 376 ret void 377 } 378 379 ; GCN-LABEL: {{^}}add_inline_imm_1.0_f64: 380 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 381 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 382 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1.0 383 ; GCN: buffer_store_dwordx2 [[REG]] 384 define amdgpu_kernel void @add_inline_imm_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 385 %y = fadd double %x, 1.0 386 store double %y, double addrspace(1)* %out 387 ret void 388 } 389 390 ; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_f64: 391 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 392 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 393 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -1.0 394 ; GCN: buffer_store_dwordx2 [[REG]] 395 define amdgpu_kernel void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 396 %y = fadd double %x, -1.0 397 store double %y, double addrspace(1)* %out 398 ret void 399 } 400 401 ; GCN-LABEL: {{^}}add_inline_imm_2.0_f64: 402 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 403 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 404 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 2.0 405 ; GCN: buffer_store_dwordx2 [[REG]] 406 define amdgpu_kernel void @add_inline_imm_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 407 %y = fadd double %x, 2.0 408 store double %y, double addrspace(1)* %out 409 ret void 410 } 411 412 ; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_f64: 413 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 414 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 415 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -2.0 416 ; GCN: buffer_store_dwordx2 [[REG]] 417 define amdgpu_kernel void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 418 %y = fadd double %x, -2.0 419 store double %y, double addrspace(1)* %out 420 ret void 421 } 422 423 ; GCN-LABEL: {{^}}add_inline_imm_4.0_f64: 424 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 425 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 426 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 4.0 427 ; GCN: buffer_store_dwordx2 [[REG]] 428 define amdgpu_kernel void @add_inline_imm_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 429 %y = fadd double %x, 4.0 430 store double %y, double addrspace(1)* %out 431 ret void 432 } 433 434 ; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_f64: 435 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 436 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 437 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -4.0 438 ; GCN: buffer_store_dwordx2 [[REG]] 439 define amdgpu_kernel void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 440 %y = fadd double %x, -4.0 441 store double %y, double addrspace(1)* %out 442 ret void 443 } 444 445 ; GCN-LABEL: {{^}}add_inline_imm_inv_2pi_f64: 446 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 447 ; SI-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 448 ; SI-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30 449 ; SI: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 450 451 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 452 ; VI: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 0.15915494{{$}} 453 ; VI: buffer_store_dwordx2 [[REG]] 454 define amdgpu_kernel void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) { 455 %y = fadd double %x, 0x3fc45f306dc9c882 456 store double %y, double addrspace(1)* %out 457 ret void 458 } 459 460 ; GCN-LABEL: {{^}}add_m_inv_2pi_f64: 461 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 462 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30 463 ; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 464 define amdgpu_kernel void @add_m_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) { 465 %y = fadd double %x, 0xbfc45f306dc9c882 466 store double %y, double addrspace(1)* %out 467 ret void 468 } 469 470 ; GCN-LABEL: {{^}}add_inline_imm_1_f64: 471 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 472 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 473 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1{{$}} 474 ; GCN: buffer_store_dwordx2 [[REG]] 475 define amdgpu_kernel void @add_inline_imm_1_f64(double addrspace(1)* %out, [8 x i32], double %x) { 476 %y = fadd double %x, 0x0000000000000001 477 store double %y, double addrspace(1)* %out 478 ret void 479 } 480 481 ; GCN-LABEL: {{^}}add_inline_imm_2_f64: 482 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 483 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 484 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 2{{$}} 485 ; GCN: buffer_store_dwordx2 [[REG]] 486 define amdgpu_kernel void @add_inline_imm_2_f64(double addrspace(1)* %out, [8 x i32], double %x) { 487 %y = fadd double %x, 0x0000000000000002 488 store double %y, double addrspace(1)* %out 489 ret void 490 } 491 492 ; GCN-LABEL: {{^}}add_inline_imm_16_f64: 493 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 494 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 495 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 16 496 ; GCN: buffer_store_dwordx2 [[REG]] 497 define amdgpu_kernel void @add_inline_imm_16_f64(double addrspace(1)* %out, [8 x i32], double %x) { 498 %y = fadd double %x, 0x0000000000000010 499 store double %y, double addrspace(1)* %out 500 ret void 501 } 502 503 ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f64: 504 ; GCN: v_mov_b32_e32 v0, -1 505 ; GCN: v_mov_b32_e32 v1, v0 506 ; GCN: buffer_store_dwordx2 v[0:1] 507 define amdgpu_kernel void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, [8 x i32], double %x) { 508 %y = fadd double %x, 0xffffffffffffffff 509 store double %y, double addrspace(1)* %out 510 ret void 511 } 512 513 ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f64: 514 ; GCN: v_mov_b32_e32 v0, -2 515 ; GCN: v_mov_b32_e32 v1, -1 516 ; GCN: buffer_store_dwordx2 v[0:1] 517 define amdgpu_kernel void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, [8 x i32], double %x) { 518 %y = fadd double %x, 0xfffffffffffffffe 519 store double %y, double addrspace(1)* %out 520 ret void 521 } 522 523 ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f64: 524 ; GCN: v_mov_b32_e32 v0, -16 525 ; GCN: v_mov_b32_e32 v1, -1 526 ; GCN: buffer_store_dwordx2 v[0:1] 527 define amdgpu_kernel void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, [8 x i32], double %x) { 528 %y = fadd double %x, 0xfffffffffffffff0 529 store double %y, double addrspace(1)* %out 530 ret void 531 } 532 533 ; GCN-LABEL: {{^}}add_inline_imm_63_f64: 534 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 535 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 536 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 63 537 ; GCN: buffer_store_dwordx2 [[REG]] 538 define amdgpu_kernel void @add_inline_imm_63_f64(double addrspace(1)* %out, [8 x i32], double %x) { 539 %y = fadd double %x, 0x000000000000003F 540 store double %y, double addrspace(1)* %out 541 ret void 542 } 543 544 ; GCN-LABEL: {{^}}add_inline_imm_64_f64: 545 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 546 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c 547 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 64 548 ; GCN: buffer_store_dwordx2 [[REG]] 549 define amdgpu_kernel void @add_inline_imm_64_f64(double addrspace(1)* %out, [8 x i32], double %x) { 550 %y = fadd double %x, 0x0000000000000040 551 store double %y, double addrspace(1)* %out 552 ret void 553 } 554 555 556 ; GCN-LABEL: {{^}}store_inline_imm_0.0_f64: 557 ; GCN: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0 558 ; GCN: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], v[[LO_VREG]]{{$}} 559 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 560 define amdgpu_kernel void @store_inline_imm_0.0_f64(double addrspace(1)* %out) { 561 store double 0.0, double addrspace(1)* %out 562 ret void 563 } 564 565 566 ; GCN-LABEL: {{^}}store_literal_imm_neg_0.0_f64: 567 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 568 ; GCN-DAG: v_bfrev_b32_e32 v[[HI_VREG:[0-9]+]], 1{{$}} 569 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 570 define amdgpu_kernel void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) { 571 store double -0.0, double addrspace(1)* %out 572 ret void 573 } 574 575 ; GCN-LABEL: {{^}}store_inline_imm_0.5_f64: 576 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 577 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fe00000 578 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 579 define amdgpu_kernel void @store_inline_imm_0.5_f64(double addrspace(1)* %out) { 580 store double 0.5, double addrspace(1)* %out 581 ret void 582 } 583 584 ; GCN-LABEL: {{^}}store_inline_imm_m_0.5_f64: 585 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 586 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfe00000 587 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 588 define amdgpu_kernel void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) { 589 store double -0.5, double addrspace(1)* %out 590 ret void 591 } 592 593 ; GCN-LABEL: {{^}}store_inline_imm_1.0_f64: 594 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 595 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3ff00000 596 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 597 define amdgpu_kernel void @store_inline_imm_1.0_f64(double addrspace(1)* %out) { 598 store double 1.0, double addrspace(1)* %out 599 ret void 600 } 601 602 ; GCN-LABEL: {{^}}store_inline_imm_m_1.0_f64: 603 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 604 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbff00000 605 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 606 define amdgpu_kernel void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) { 607 store double -1.0, double addrspace(1)* %out 608 ret void 609 } 610 611 ; GCN-LABEL: {{^}}store_inline_imm_2.0_f64: 612 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 613 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 2.0 614 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 615 define amdgpu_kernel void @store_inline_imm_2.0_f64(double addrspace(1)* %out) { 616 store double 2.0, double addrspace(1)* %out 617 ret void 618 } 619 620 ; GCN-LABEL: {{^}}store_inline_imm_m_2.0_f64: 621 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 622 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], -2.0 623 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 624 define amdgpu_kernel void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) { 625 store double -2.0, double addrspace(1)* %out 626 ret void 627 } 628 629 ; GCN-LABEL: {{^}}store_inline_imm_4.0_f64: 630 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 631 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40100000 632 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 633 define amdgpu_kernel void @store_inline_imm_4.0_f64(double addrspace(1)* %out) { 634 store double 4.0, double addrspace(1)* %out 635 ret void 636 } 637 638 ; GCN-LABEL: {{^}}store_inline_imm_m_4.0_f64: 639 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 640 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xc0100000 641 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 642 define amdgpu_kernel void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) { 643 store double -4.0, double addrspace(1)* %out 644 ret void 645 } 646 647 ; GCN-LABEL: {{^}}store_inv_2pi_f64: 648 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 649 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30 650 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 651 define amdgpu_kernel void @store_inv_2pi_f64(double addrspace(1)* %out) { 652 store double 0x3fc45f306dc9c882, double addrspace(1)* %out 653 ret void 654 } 655 656 ; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f64: 657 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 658 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30 659 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 660 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) { 661 store double 0xbfc45f306dc9c882, double addrspace(1)* %out 662 ret void 663 } 664 665 ; GCN-LABEL: {{^}}store_literal_imm_f64: 666 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} 667 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40b00000 668 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 669 define amdgpu_kernel void @store_literal_imm_f64(double addrspace(1)* %out) { 670 store double 4096.0, double addrspace(1)* %out 671 ret void 672 } 673 674 ; GCN-LABEL: {{^}}literal_folding: 675 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}} 676 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}} 677 define amdgpu_vs void @literal_folding(float %arg) { 678 main_body: 679 %tmp = fmul float %arg, 0x3FE86A7F00000000 680 %tmp1 = fmul float %arg, 0xBFE86A7F00000000 681 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) #0 682 ret void 683 } 684 685 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 686 687 attributes #0 = { nounwind } 688