Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright 2014 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  */
     23 
     24 #include "ac_binary.h"
     25 
     26 #include "util/u_math.h"
     27 #include "util/u_memory.h"
     28 
     29 #include <gelf.h>
     30 #include <libelf.h>
     31 #include <stdio.h>
     32 
     33 #include <sid.h>
     34 
     35 #define SPILLED_SGPRS                                     0x4
     36 #define SPILLED_VGPRS                                     0x8
     37 
     38 static void parse_symbol_table(Elf_Data *symbol_table_data,
     39 				const GElf_Shdr *symbol_table_header,
     40 				struct ac_shader_binary *binary)
     41 {
     42 	GElf_Sym symbol;
     43 	unsigned i = 0;
     44 	unsigned symbol_count =
     45 		symbol_table_header->sh_size / symbol_table_header->sh_entsize;
     46 
     47 	/* We are over allocating this list, because symbol_count gives the
     48 	 * total number of symbols, and we will only be filling the list
     49 	 * with offsets of global symbols.  The memory savings from
     50 	 * allocating the correct size of this list will be small, and
     51 	 * I don't think it is worth the cost of pre-computing the number
     52 	 * of global symbols.
     53 	 */
     54 	binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t));
     55 
     56 	while (gelf_getsym(symbol_table_data, i++, &symbol)) {
     57 		unsigned i;
     58 		if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL ||
     59 		    symbol.st_shndx == 0 /* Undefined symbol */) {
     60 			continue;
     61 		}
     62 
     63 		binary->global_symbol_offsets[binary->global_symbol_count] =
     64 					symbol.st_value;
     65 
     66 		/* Sort the list using bubble sort.  This list will usually
     67 		 * be small. */
     68 		for (i = binary->global_symbol_count; i > 0; --i) {
     69 			uint64_t lhs = binary->global_symbol_offsets[i - 1];
     70 			uint64_t rhs = binary->global_symbol_offsets[i];
     71 			if (lhs < rhs) {
     72 				break;
     73 			}
     74 			binary->global_symbol_offsets[i] = lhs;
     75 			binary->global_symbol_offsets[i - 1] = rhs;
     76 		}
     77 		++binary->global_symbol_count;
     78 	}
     79 }
     80 
     81 static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
     82 			unsigned symbol_sh_link,
     83 			struct ac_shader_binary *binary)
     84 {
     85 	unsigned i;
     86 
     87 	if (!relocs || !symbols || !binary->reloc_count) {
     88 		return;
     89 	}
     90 	binary->relocs = CALLOC(binary->reloc_count,
     91 			sizeof(struct ac_shader_reloc));
     92 	for (i = 0; i < binary->reloc_count; i++) {
     93 		GElf_Sym symbol;
     94 		GElf_Rel rel;
     95 		char *symbol_name;
     96 		struct ac_shader_reloc *reloc = &binary->relocs[i];
     97 
     98 		gelf_getrel(relocs, i, &rel);
     99 		gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol);
    100 		symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name);
    101 
    102 		reloc->offset = rel.r_offset;
    103 		strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1);
    104 		reloc->name[sizeof(reloc->name)-1] = 0;
    105 	}
    106 }
    107 
    108 bool ac_elf_read(const char *elf_data, unsigned elf_size,
    109 		 struct ac_shader_binary *binary)
    110 {
    111 	char *elf_buffer;
    112 	Elf *elf;
    113 	Elf_Scn *section = NULL;
    114 	Elf_Data *symbols = NULL, *relocs = NULL;
    115 	size_t section_str_index;
    116 	unsigned symbol_sh_link = 0;
    117 	bool success = true;
    118 
    119 	/* One of the libelf implementations
    120 	 * (http://www.mr511.de/software/english.htm) requires calling
    121 	 * elf_version() before elf_memory().
    122 	 */
    123 	elf_version(EV_CURRENT);
    124 	elf_buffer = MALLOC(elf_size);
    125 	memcpy(elf_buffer, elf_data, elf_size);
    126 
    127 	elf = elf_memory(elf_buffer, elf_size);
    128 
    129 	elf_getshdrstrndx(elf, &section_str_index);
    130 
    131 	while ((section = elf_nextscn(elf, section))) {
    132 		const char *name;
    133 		Elf_Data *section_data = NULL;
    134 		GElf_Shdr section_header;
    135 		if (gelf_getshdr(section, &section_header) != &section_header) {
    136 			fprintf(stderr, "Failed to read ELF section header\n");
    137 			success = false;
    138 			break;
    139 		}
    140 		name = elf_strptr(elf, section_str_index, section_header.sh_name);
    141 		if (!strcmp(name, ".text")) {
    142 			section_data = elf_getdata(section, section_data);
    143 			binary->code_size = section_data->d_size;
    144 			binary->code = MALLOC(binary->code_size * sizeof(unsigned char));
    145 			memcpy(binary->code, section_data->d_buf, binary->code_size);
    146 		} else if (!strcmp(name, ".AMDGPU.config")) {
    147 			section_data = elf_getdata(section, section_data);
    148 			binary->config_size = section_data->d_size;
    149 			if (!binary->config_size) {
    150 				fprintf(stderr, ".AMDGPU.config is empty!\n");
    151 				success = false;
    152 				break;
    153 			}
    154 			binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
    155 			memcpy(binary->config, section_data->d_buf, binary->config_size);
    156 		} else if (!strcmp(name, ".AMDGPU.disasm")) {
    157 			/* Always read disassembly if it's available. */
    158 			section_data = elf_getdata(section, section_data);
    159 			binary->disasm_string = strndup(section_data->d_buf,
    160 							section_data->d_size);
    161 		} else if (!strncmp(name, ".rodata", 7)) {
    162 			section_data = elf_getdata(section, section_data);
    163 			binary->rodata_size = section_data->d_size;
    164 			binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char));
    165 			memcpy(binary->rodata, section_data->d_buf, binary->rodata_size);
    166 		} else if (!strncmp(name, ".symtab", 7)) {
    167 			symbols = elf_getdata(section, section_data);
    168 			symbol_sh_link = section_header.sh_link;
    169 			parse_symbol_table(symbols, &section_header, binary);
    170 		} else if (!strcmp(name, ".rel.text")) {
    171 			relocs = elf_getdata(section, section_data);
    172 			binary->reloc_count = section_header.sh_size /
    173 					section_header.sh_entsize;
    174 		}
    175 	}
    176 
    177 	parse_relocs(elf, relocs, symbols, symbol_sh_link, binary);
    178 
    179 	if (elf){
    180 		elf_end(elf);
    181 	}
    182 	FREE(elf_buffer);
    183 
    184 	/* Cache the config size per symbol */
    185 	if (binary->global_symbol_count) {
    186 		binary->config_size_per_symbol =
    187 			binary->config_size / binary->global_symbol_count;
    188 	} else {
    189 		binary->global_symbol_count = 1;
    190 		binary->config_size_per_symbol = binary->config_size;
    191 	}
    192 	return success;
    193 }
    194 
    195 const unsigned char *ac_shader_binary_config_start(
    196 	const struct ac_shader_binary *binary,
    197 	uint64_t symbol_offset)
    198 {
    199 	unsigned i;
    200 	for (i = 0; i < binary->global_symbol_count; ++i) {
    201 		if (binary->global_symbol_offsets[i] == symbol_offset) {
    202 			unsigned offset = i * binary->config_size_per_symbol;
    203 			return binary->config + offset;
    204 		}
    205 	}
    206 	return binary->config;
    207 }
    208 
    209 
    210 static const char *scratch_rsrc_dword0_symbol =
    211 	"SCRATCH_RSRC_DWORD0";
    212 
    213 static const char *scratch_rsrc_dword1_symbol =
    214 	"SCRATCH_RSRC_DWORD1";
    215 
    216 void ac_shader_binary_read_config(struct ac_shader_binary *binary,
    217 				  struct ac_shader_config *conf,
    218 				  unsigned symbol_offset,
    219 				  bool supports_spill)
    220 {
    221 	unsigned i;
    222 	const unsigned char *config =
    223 		ac_shader_binary_config_start(binary, symbol_offset);
    224 	bool really_needs_scratch = false;
    225 	uint32_t wavesize = 0;
    226 	/* LLVM adds SGPR spills to the scratch size.
    227 	 * Find out if we really need the scratch buffer.
    228 	 */
    229 	if (supports_spill) {
    230 		really_needs_scratch = true;
    231 	} else {
    232 		for (i = 0; i < binary->reloc_count; i++) {
    233 			const struct ac_shader_reloc *reloc = &binary->relocs[i];
    234 
    235 			if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
    236 			    !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
    237 				really_needs_scratch = true;
    238 				break;
    239 			}
    240 		}
    241 	}
    242 
    243 	for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
    244 		unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
    245 		unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
    246 		switch (reg) {
    247 		case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
    248 		case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
    249 		case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
    250 		case R_00B848_COMPUTE_PGM_RSRC1:
    251 		case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
    252 			conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
    253 			conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
    254 			conf->float_mode =  G_00B028_FLOAT_MODE(value);
    255 			break;
    256 		case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
    257 			conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
    258 			break;
    259 		case R_00B84C_COMPUTE_PGM_RSRC2:
    260 			conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
    261 			break;
    262 		case R_0286CC_SPI_PS_INPUT_ENA:
    263 			conf->spi_ps_input_ena = value;
    264 			break;
    265 		case R_0286D0_SPI_PS_INPUT_ADDR:
    266 			conf->spi_ps_input_addr = value;
    267 			break;
    268 		case R_0286E8_SPI_TMPRING_SIZE:
    269 		case R_00B860_COMPUTE_TMPRING_SIZE:
    270 			/* WAVESIZE is in units of 256 dwords. */
    271 			wavesize = value;
    272 			break;
    273 		case SPILLED_SGPRS:
    274 			conf->spilled_sgprs = value;
    275 			break;
    276 		case SPILLED_VGPRS:
    277 			conf->spilled_vgprs = value;
    278 			break;
    279 		default:
    280 			{
    281 				static bool printed;
    282 
    283 				if (!printed) {
    284 					fprintf(stderr, "Warning: LLVM emitted unknown "
    285 						"config register: 0x%x\n", reg);
    286 					printed = true;
    287 				}
    288 			}
    289 			break;
    290 		}
    291 
    292 		if (!conf->spi_ps_input_addr)
    293 			conf->spi_ps_input_addr = conf->spi_ps_input_ena;
    294 	}
    295 
    296 	if (really_needs_scratch) {
    297 		/* sgprs spills aren't spilling */
    298 	        conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 256 * 4;
    299 	}
    300 }
    301 
    302 void ac_shader_binary_clean(struct ac_shader_binary *b)
    303 {
    304 	if (!b)
    305 		return;
    306 	FREE(b->code);
    307 	FREE(b->config);
    308 	FREE(b->rodata);
    309 	FREE(b->global_symbol_offsets);
    310 	FREE(b->relocs);
    311 	FREE(b->disasm_string);
    312 	FREE(b->llvm_ir_string);
    313 }
    314