1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24 #include "ac_binary.h" 25 26 #include "util/u_math.h" 27 #include "util/u_memory.h" 28 29 #include <gelf.h> 30 #include <libelf.h> 31 #include <stdio.h> 32 33 #include <sid.h> 34 35 #define SPILLED_SGPRS 0x4 36 #define SPILLED_VGPRS 0x8 37 38 static void parse_symbol_table(Elf_Data *symbol_table_data, 39 const GElf_Shdr *symbol_table_header, 40 struct ac_shader_binary *binary) 41 { 42 GElf_Sym symbol; 43 unsigned i = 0; 44 unsigned symbol_count = 45 symbol_table_header->sh_size / symbol_table_header->sh_entsize; 46 47 /* We are over allocating this list, because symbol_count gives the 48 * total number of symbols, and we will only be filling the list 49 * with offsets of global symbols. The memory savings from 50 * allocating the correct size of this list will be small, and 51 * I don't think it is worth the cost of pre-computing the number 52 * of global symbols. 53 */ 54 binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t)); 55 56 while (gelf_getsym(symbol_table_data, i++, &symbol)) { 57 unsigned i; 58 if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL || 59 symbol.st_shndx == 0 /* Undefined symbol */) { 60 continue; 61 } 62 63 binary->global_symbol_offsets[binary->global_symbol_count] = 64 symbol.st_value; 65 66 /* Sort the list using bubble sort. This list will usually 67 * be small. */ 68 for (i = binary->global_symbol_count; i > 0; --i) { 69 uint64_t lhs = binary->global_symbol_offsets[i - 1]; 70 uint64_t rhs = binary->global_symbol_offsets[i]; 71 if (lhs < rhs) { 72 break; 73 } 74 binary->global_symbol_offsets[i] = lhs; 75 binary->global_symbol_offsets[i - 1] = rhs; 76 } 77 ++binary->global_symbol_count; 78 } 79 } 80 81 static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols, 82 unsigned symbol_sh_link, 83 struct ac_shader_binary *binary) 84 { 85 unsigned i; 86 87 if (!relocs || !symbols || !binary->reloc_count) { 88 return; 89 } 90 binary->relocs = CALLOC(binary->reloc_count, 91 sizeof(struct ac_shader_reloc)); 92 for (i = 0; i < binary->reloc_count; i++) { 93 GElf_Sym symbol; 94 GElf_Rel rel; 95 char *symbol_name; 96 struct ac_shader_reloc *reloc = &binary->relocs[i]; 97 98 gelf_getrel(relocs, i, &rel); 99 gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol); 100 symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name); 101 102 reloc->offset = rel.r_offset; 103 strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1); 104 reloc->name[sizeof(reloc->name)-1] = 0; 105 } 106 } 107 108 bool ac_elf_read(const char *elf_data, unsigned elf_size, 109 struct ac_shader_binary *binary) 110 { 111 char *elf_buffer; 112 Elf *elf; 113 Elf_Scn *section = NULL; 114 Elf_Data *symbols = NULL, *relocs = NULL; 115 size_t section_str_index; 116 unsigned symbol_sh_link = 0; 117 bool success = true; 118 119 /* One of the libelf implementations 120 * (http://www.mr511.de/software/english.htm) requires calling 121 * elf_version() before elf_memory(). 122 */ 123 elf_version(EV_CURRENT); 124 elf_buffer = MALLOC(elf_size); 125 memcpy(elf_buffer, elf_data, elf_size); 126 127 elf = elf_memory(elf_buffer, elf_size); 128 129 elf_getshdrstrndx(elf, §ion_str_index); 130 131 while ((section = elf_nextscn(elf, section))) { 132 const char *name; 133 Elf_Data *section_data = NULL; 134 GElf_Shdr section_header; 135 if (gelf_getshdr(section, §ion_header) != §ion_header) { 136 fprintf(stderr, "Failed to read ELF section header\n"); 137 success = false; 138 break; 139 } 140 name = elf_strptr(elf, section_str_index, section_header.sh_name); 141 if (!strcmp(name, ".text")) { 142 section_data = elf_getdata(section, section_data); 143 binary->code_size = section_data->d_size; 144 binary->code = MALLOC(binary->code_size * sizeof(unsigned char)); 145 memcpy(binary->code, section_data->d_buf, binary->code_size); 146 } else if (!strcmp(name, ".AMDGPU.config")) { 147 section_data = elf_getdata(section, section_data); 148 binary->config_size = section_data->d_size; 149 if (!binary->config_size) { 150 fprintf(stderr, ".AMDGPU.config is empty!\n"); 151 success = false; 152 break; 153 } 154 binary->config = MALLOC(binary->config_size * sizeof(unsigned char)); 155 memcpy(binary->config, section_data->d_buf, binary->config_size); 156 } else if (!strcmp(name, ".AMDGPU.disasm")) { 157 /* Always read disassembly if it's available. */ 158 section_data = elf_getdata(section, section_data); 159 binary->disasm_string = strndup(section_data->d_buf, 160 section_data->d_size); 161 } else if (!strncmp(name, ".rodata", 7)) { 162 section_data = elf_getdata(section, section_data); 163 binary->rodata_size = section_data->d_size; 164 binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char)); 165 memcpy(binary->rodata, section_data->d_buf, binary->rodata_size); 166 } else if (!strncmp(name, ".symtab", 7)) { 167 symbols = elf_getdata(section, section_data); 168 symbol_sh_link = section_header.sh_link; 169 parse_symbol_table(symbols, §ion_header, binary); 170 } else if (!strcmp(name, ".rel.text")) { 171 relocs = elf_getdata(section, section_data); 172 binary->reloc_count = section_header.sh_size / 173 section_header.sh_entsize; 174 } 175 } 176 177 parse_relocs(elf, relocs, symbols, symbol_sh_link, binary); 178 179 if (elf){ 180 elf_end(elf); 181 } 182 FREE(elf_buffer); 183 184 /* Cache the config size per symbol */ 185 if (binary->global_symbol_count) { 186 binary->config_size_per_symbol = 187 binary->config_size / binary->global_symbol_count; 188 } else { 189 binary->global_symbol_count = 1; 190 binary->config_size_per_symbol = binary->config_size; 191 } 192 return success; 193 } 194 195 const unsigned char *ac_shader_binary_config_start( 196 const struct ac_shader_binary *binary, 197 uint64_t symbol_offset) 198 { 199 unsigned i; 200 for (i = 0; i < binary->global_symbol_count; ++i) { 201 if (binary->global_symbol_offsets[i] == symbol_offset) { 202 unsigned offset = i * binary->config_size_per_symbol; 203 return binary->config + offset; 204 } 205 } 206 return binary->config; 207 } 208 209 210 static const char *scratch_rsrc_dword0_symbol = 211 "SCRATCH_RSRC_DWORD0"; 212 213 static const char *scratch_rsrc_dword1_symbol = 214 "SCRATCH_RSRC_DWORD1"; 215 216 void ac_shader_binary_read_config(struct ac_shader_binary *binary, 217 struct ac_shader_config *conf, 218 unsigned symbol_offset, 219 bool supports_spill) 220 { 221 unsigned i; 222 const unsigned char *config = 223 ac_shader_binary_config_start(binary, symbol_offset); 224 bool really_needs_scratch = false; 225 uint32_t wavesize = 0; 226 /* LLVM adds SGPR spills to the scratch size. 227 * Find out if we really need the scratch buffer. 228 */ 229 if (supports_spill) { 230 really_needs_scratch = true; 231 } else { 232 for (i = 0; i < binary->reloc_count; i++) { 233 const struct ac_shader_reloc *reloc = &binary->relocs[i]; 234 235 if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || 236 !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { 237 really_needs_scratch = true; 238 break; 239 } 240 } 241 } 242 243 for (i = 0; i < binary->config_size_per_symbol; i+= 8) { 244 unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i)); 245 unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4)); 246 switch (reg) { 247 case R_00B028_SPI_SHADER_PGM_RSRC1_PS: 248 case R_00B128_SPI_SHADER_PGM_RSRC1_VS: 249 case R_00B228_SPI_SHADER_PGM_RSRC1_GS: 250 case R_00B848_COMPUTE_PGM_RSRC1: 251 case R_00B428_SPI_SHADER_PGM_RSRC1_HS: 252 conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); 253 conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); 254 conf->float_mode = G_00B028_FLOAT_MODE(value); 255 break; 256 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: 257 conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); 258 break; 259 case R_00B84C_COMPUTE_PGM_RSRC2: 260 conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value)); 261 break; 262 case R_0286CC_SPI_PS_INPUT_ENA: 263 conf->spi_ps_input_ena = value; 264 break; 265 case R_0286D0_SPI_PS_INPUT_ADDR: 266 conf->spi_ps_input_addr = value; 267 break; 268 case R_0286E8_SPI_TMPRING_SIZE: 269 case R_00B860_COMPUTE_TMPRING_SIZE: 270 /* WAVESIZE is in units of 256 dwords. */ 271 wavesize = value; 272 break; 273 case SPILLED_SGPRS: 274 conf->spilled_sgprs = value; 275 break; 276 case SPILLED_VGPRS: 277 conf->spilled_vgprs = value; 278 break; 279 default: 280 { 281 static bool printed; 282 283 if (!printed) { 284 fprintf(stderr, "Warning: LLVM emitted unknown " 285 "config register: 0x%x\n", reg); 286 printed = true; 287 } 288 } 289 break; 290 } 291 292 if (!conf->spi_ps_input_addr) 293 conf->spi_ps_input_addr = conf->spi_ps_input_ena; 294 } 295 296 if (really_needs_scratch) { 297 /* sgprs spills aren't spilling */ 298 conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 256 * 4; 299 } 300 } 301 302 void ac_shader_binary_clean(struct ac_shader_binary *b) 303 { 304 if (!b) 305 return; 306 FREE(b->code); 307 FREE(b->config); 308 FREE(b->rodata); 309 FREE(b->global_symbol_offsets); 310 FREE(b->relocs); 311 FREE(b->disasm_string); 312 FREE(b->llvm_ir_string); 313 } 314