1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Tom Stellard <thomas.stellard (at) amd.com> 24 * 25 * Based on radeon_elf_util.c. 26 */ 27 28 #include "ac_binary.h" 29 30 #include "util/u_math.h" 31 #include "util/u_memory.h" 32 33 #include <gelf.h> 34 #include <libelf.h> 35 #include <stdio.h> 36 37 #include <sid.h> 38 39 #define SPILLED_SGPRS 0x4 40 #define SPILLED_VGPRS 0x8 41 42 static void parse_symbol_table(Elf_Data *symbol_table_data, 43 const GElf_Shdr *symbol_table_header, 44 struct ac_shader_binary *binary) 45 { 46 GElf_Sym symbol; 47 unsigned i = 0; 48 unsigned symbol_count = 49 symbol_table_header->sh_size / symbol_table_header->sh_entsize; 50 51 /* We are over allocating this list, because symbol_count gives the 52 * total number of symbols, and we will only be filling the list 53 * with offsets of global symbols. The memory savings from 54 * allocating the correct size of this list will be small, and 55 * I don't think it is worth the cost of pre-computing the number 56 * of global symbols. 57 */ 58 binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t)); 59 60 while (gelf_getsym(symbol_table_data, i++, &symbol)) { 61 unsigned i; 62 if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL || 63 symbol.st_shndx == 0 /* Undefined symbol */) { 64 continue; 65 } 66 67 binary->global_symbol_offsets[binary->global_symbol_count] = 68 symbol.st_value; 69 70 /* Sort the list using bubble sort. This list will usually 71 * be small. */ 72 for (i = binary->global_symbol_count; i > 0; --i) { 73 uint64_t lhs = binary->global_symbol_offsets[i - 1]; 74 uint64_t rhs = binary->global_symbol_offsets[i]; 75 if (lhs < rhs) { 76 break; 77 } 78 binary->global_symbol_offsets[i] = lhs; 79 binary->global_symbol_offsets[i - 1] = rhs; 80 } 81 ++binary->global_symbol_count; 82 } 83 } 84 85 static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols, 86 unsigned symbol_sh_link, 87 struct ac_shader_binary *binary) 88 { 89 unsigned i; 90 91 if (!relocs || !symbols || !binary->reloc_count) { 92 return; 93 } 94 binary->relocs = CALLOC(binary->reloc_count, 95 sizeof(struct ac_shader_reloc)); 96 for (i = 0; i < binary->reloc_count; i++) { 97 GElf_Sym symbol; 98 GElf_Rel rel; 99 char *symbol_name; 100 struct ac_shader_reloc *reloc = &binary->relocs[i]; 101 102 gelf_getrel(relocs, i, &rel); 103 gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol); 104 symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name); 105 106 reloc->offset = rel.r_offset; 107 strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1); 108 reloc->name[sizeof(reloc->name)-1] = 0; 109 } 110 } 111 112 void ac_elf_read(const char *elf_data, unsigned elf_size, 113 struct ac_shader_binary *binary) 114 { 115 char *elf_buffer; 116 Elf *elf; 117 Elf_Scn *section = NULL; 118 Elf_Data *symbols = NULL, *relocs = NULL; 119 size_t section_str_index; 120 unsigned symbol_sh_link = 0; 121 122 /* One of the libelf implementations 123 * (http://www.mr511.de/software/english.htm) requires calling 124 * elf_version() before elf_memory(). 125 */ 126 elf_version(EV_CURRENT); 127 elf_buffer = MALLOC(elf_size); 128 memcpy(elf_buffer, elf_data, elf_size); 129 130 elf = elf_memory(elf_buffer, elf_size); 131 132 elf_getshdrstrndx(elf, §ion_str_index); 133 134 while ((section = elf_nextscn(elf, section))) { 135 const char *name; 136 Elf_Data *section_data = NULL; 137 GElf_Shdr section_header; 138 if (gelf_getshdr(section, §ion_header) != §ion_header) { 139 fprintf(stderr, "Failed to read ELF section header\n"); 140 return; 141 } 142 name = elf_strptr(elf, section_str_index, section_header.sh_name); 143 if (!strcmp(name, ".text")) { 144 section_data = elf_getdata(section, section_data); 145 binary->code_size = section_data->d_size; 146 binary->code = MALLOC(binary->code_size * sizeof(unsigned char)); 147 memcpy(binary->code, section_data->d_buf, binary->code_size); 148 } else if (!strcmp(name, ".AMDGPU.config")) { 149 section_data = elf_getdata(section, section_data); 150 binary->config_size = section_data->d_size; 151 binary->config = MALLOC(binary->config_size * sizeof(unsigned char)); 152 memcpy(binary->config, section_data->d_buf, binary->config_size); 153 } else if (!strcmp(name, ".AMDGPU.disasm")) { 154 /* Always read disassembly if it's available. */ 155 section_data = elf_getdata(section, section_data); 156 binary->disasm_string = strndup(section_data->d_buf, 157 section_data->d_size); 158 } else if (!strncmp(name, ".rodata", 7)) { 159 section_data = elf_getdata(section, section_data); 160 binary->rodata_size = section_data->d_size; 161 binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char)); 162 memcpy(binary->rodata, section_data->d_buf, binary->rodata_size); 163 } else if (!strncmp(name, ".symtab", 7)) { 164 symbols = elf_getdata(section, section_data); 165 symbol_sh_link = section_header.sh_link; 166 parse_symbol_table(symbols, §ion_header, binary); 167 } else if (!strcmp(name, ".rel.text")) { 168 relocs = elf_getdata(section, section_data); 169 binary->reloc_count = section_header.sh_size / 170 section_header.sh_entsize; 171 } 172 } 173 174 parse_relocs(elf, relocs, symbols, symbol_sh_link, binary); 175 176 if (elf){ 177 elf_end(elf); 178 } 179 FREE(elf_buffer); 180 181 /* Cache the config size per symbol */ 182 if (binary->global_symbol_count) { 183 binary->config_size_per_symbol = 184 binary->config_size / binary->global_symbol_count; 185 } else { 186 binary->global_symbol_count = 1; 187 binary->config_size_per_symbol = binary->config_size; 188 } 189 } 190 191 static 192 const unsigned char *ac_shader_binary_config_start( 193 const struct ac_shader_binary *binary, 194 uint64_t symbol_offset) 195 { 196 unsigned i; 197 for (i = 0; i < binary->global_symbol_count; ++i) { 198 if (binary->global_symbol_offsets[i] == symbol_offset) { 199 unsigned offset = i * binary->config_size_per_symbol; 200 return binary->config + offset; 201 } 202 } 203 return binary->config; 204 } 205 206 207 static const char *scratch_rsrc_dword0_symbol = 208 "SCRATCH_RSRC_DWORD0"; 209 210 static const char *scratch_rsrc_dword1_symbol = 211 "SCRATCH_RSRC_DWORD1"; 212 213 void ac_shader_binary_read_config(struct ac_shader_binary *binary, 214 struct ac_shader_config *conf, 215 unsigned symbol_offset) 216 { 217 unsigned i; 218 const unsigned char *config = 219 ac_shader_binary_config_start(binary, symbol_offset); 220 bool really_needs_scratch = false; 221 222 /* LLVM adds SGPR spills to the scratch size. 223 * Find out if we really need the scratch buffer. 224 */ 225 for (i = 0; i < binary->reloc_count; i++) { 226 const struct ac_shader_reloc *reloc = &binary->relocs[i]; 227 228 if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || 229 !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { 230 really_needs_scratch = true; 231 break; 232 } 233 } 234 235 for (i = 0; i < binary->config_size_per_symbol; i+= 8) { 236 unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i)); 237 unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4)); 238 switch (reg) { 239 case R_00B028_SPI_SHADER_PGM_RSRC1_PS: 240 case R_00B128_SPI_SHADER_PGM_RSRC1_VS: 241 case R_00B228_SPI_SHADER_PGM_RSRC1_GS: 242 case R_00B848_COMPUTE_PGM_RSRC1: 243 conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); 244 conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); 245 conf->float_mode = G_00B028_FLOAT_MODE(value); 246 break; 247 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: 248 conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); 249 break; 250 case R_00B84C_COMPUTE_PGM_RSRC2: 251 conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value)); 252 break; 253 case R_0286CC_SPI_PS_INPUT_ENA: 254 conf->spi_ps_input_ena = value; 255 break; 256 case R_0286D0_SPI_PS_INPUT_ADDR: 257 conf->spi_ps_input_addr = value; 258 break; 259 case R_0286E8_SPI_TMPRING_SIZE: 260 case R_00B860_COMPUTE_TMPRING_SIZE: 261 /* WAVESIZE is in units of 256 dwords. */ 262 if (really_needs_scratch) 263 conf->scratch_bytes_per_wave = 264 G_00B860_WAVESIZE(value) * 256 * 4; 265 break; 266 case SPILLED_SGPRS: 267 conf->spilled_sgprs = value; 268 break; 269 case SPILLED_VGPRS: 270 conf->spilled_vgprs = value; 271 break; 272 default: 273 { 274 static bool printed; 275 276 if (!printed) { 277 fprintf(stderr, "Warning: LLVM emitted unknown " 278 "config register: 0x%x\n", reg); 279 printed = true; 280 } 281 } 282 break; 283 } 284 285 if (!conf->spi_ps_input_addr) 286 conf->spi_ps_input_addr = conf->spi_ps_input_ena; 287 } 288 } 289