Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright 2014 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  *
     23  * Authors: Tom Stellard <thomas.stellard (at) amd.com>
     24  *
     25  * Based on radeon_elf_util.c.
     26  */
     27 
     28 #include "ac_binary.h"
     29 
     30 #include "util/u_math.h"
     31 #include "util/u_memory.h"
     32 
     33 #include <gelf.h>
     34 #include <libelf.h>
     35 #include <stdio.h>
     36 
     37 #include <sid.h>
     38 
     39 #define SPILLED_SGPRS                                     0x4
     40 #define SPILLED_VGPRS                                     0x8
     41 
     42 static void parse_symbol_table(Elf_Data *symbol_table_data,
     43 				const GElf_Shdr *symbol_table_header,
     44 				struct ac_shader_binary *binary)
     45 {
     46 	GElf_Sym symbol;
     47 	unsigned i = 0;
     48 	unsigned symbol_count =
     49 		symbol_table_header->sh_size / symbol_table_header->sh_entsize;
     50 
     51 	/* We are over allocating this list, because symbol_count gives the
     52 	 * total number of symbols, and we will only be filling the list
     53 	 * with offsets of global symbols.  The memory savings from
     54 	 * allocating the correct size of this list will be small, and
     55 	 * I don't think it is worth the cost of pre-computing the number
     56 	 * of global symbols.
     57 	 */
     58 	binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t));
     59 
     60 	while (gelf_getsym(symbol_table_data, i++, &symbol)) {
     61 		unsigned i;
     62 		if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL ||
     63 		    symbol.st_shndx == 0 /* Undefined symbol */) {
     64 			continue;
     65 		}
     66 
     67 		binary->global_symbol_offsets[binary->global_symbol_count] =
     68 					symbol.st_value;
     69 
     70 		/* Sort the list using bubble sort.  This list will usually
     71 		 * be small. */
     72 		for (i = binary->global_symbol_count; i > 0; --i) {
     73 			uint64_t lhs = binary->global_symbol_offsets[i - 1];
     74 			uint64_t rhs = binary->global_symbol_offsets[i];
     75 			if (lhs < rhs) {
     76 				break;
     77 			}
     78 			binary->global_symbol_offsets[i] = lhs;
     79 			binary->global_symbol_offsets[i - 1] = rhs;
     80 		}
     81 		++binary->global_symbol_count;
     82 	}
     83 }
     84 
     85 static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
     86 			unsigned symbol_sh_link,
     87 			struct ac_shader_binary *binary)
     88 {
     89 	unsigned i;
     90 
     91 	if (!relocs || !symbols || !binary->reloc_count) {
     92 		return;
     93 	}
     94 	binary->relocs = CALLOC(binary->reloc_count,
     95 			sizeof(struct ac_shader_reloc));
     96 	for (i = 0; i < binary->reloc_count; i++) {
     97 		GElf_Sym symbol;
     98 		GElf_Rel rel;
     99 		char *symbol_name;
    100 		struct ac_shader_reloc *reloc = &binary->relocs[i];
    101 
    102 		gelf_getrel(relocs, i, &rel);
    103 		gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol);
    104 		symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name);
    105 
    106 		reloc->offset = rel.r_offset;
    107 		strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1);
    108 		reloc->name[sizeof(reloc->name)-1] = 0;
    109 	}
    110 }
    111 
    112 void ac_elf_read(const char *elf_data, unsigned elf_size,
    113 		 struct ac_shader_binary *binary)
    114 {
    115 	char *elf_buffer;
    116 	Elf *elf;
    117 	Elf_Scn *section = NULL;
    118 	Elf_Data *symbols = NULL, *relocs = NULL;
    119 	size_t section_str_index;
    120 	unsigned symbol_sh_link = 0;
    121 
    122 	/* One of the libelf implementations
    123 	 * (http://www.mr511.de/software/english.htm) requires calling
    124 	 * elf_version() before elf_memory().
    125 	 */
    126 	elf_version(EV_CURRENT);
    127 	elf_buffer = MALLOC(elf_size);
    128 	memcpy(elf_buffer, elf_data, elf_size);
    129 
    130 	elf = elf_memory(elf_buffer, elf_size);
    131 
    132 	elf_getshdrstrndx(elf, &section_str_index);
    133 
    134 	while ((section = elf_nextscn(elf, section))) {
    135 		const char *name;
    136 		Elf_Data *section_data = NULL;
    137 		GElf_Shdr section_header;
    138 		if (gelf_getshdr(section, &section_header) != &section_header) {
    139 			fprintf(stderr, "Failed to read ELF section header\n");
    140 			return;
    141 		}
    142 		name = elf_strptr(elf, section_str_index, section_header.sh_name);
    143 		if (!strcmp(name, ".text")) {
    144 			section_data = elf_getdata(section, section_data);
    145 			binary->code_size = section_data->d_size;
    146 			binary->code = MALLOC(binary->code_size * sizeof(unsigned char));
    147 			memcpy(binary->code, section_data->d_buf, binary->code_size);
    148 		} else if (!strcmp(name, ".AMDGPU.config")) {
    149 			section_data = elf_getdata(section, section_data);
    150 			binary->config_size = section_data->d_size;
    151 			binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
    152 			memcpy(binary->config, section_data->d_buf, binary->config_size);
    153 		} else if (!strcmp(name, ".AMDGPU.disasm")) {
    154 			/* Always read disassembly if it's available. */
    155 			section_data = elf_getdata(section, section_data);
    156 			binary->disasm_string = strndup(section_data->d_buf,
    157 							section_data->d_size);
    158 		} else if (!strncmp(name, ".rodata", 7)) {
    159 			section_data = elf_getdata(section, section_data);
    160 			binary->rodata_size = section_data->d_size;
    161 			binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char));
    162 			memcpy(binary->rodata, section_data->d_buf, binary->rodata_size);
    163 		} else if (!strncmp(name, ".symtab", 7)) {
    164 			symbols = elf_getdata(section, section_data);
    165 			symbol_sh_link = section_header.sh_link;
    166 			parse_symbol_table(symbols, &section_header, binary);
    167 		} else if (!strcmp(name, ".rel.text")) {
    168 			relocs = elf_getdata(section, section_data);
    169 			binary->reloc_count = section_header.sh_size /
    170 					section_header.sh_entsize;
    171 		}
    172 	}
    173 
    174 	parse_relocs(elf, relocs, symbols, symbol_sh_link, binary);
    175 
    176 	if (elf){
    177 		elf_end(elf);
    178 	}
    179 	FREE(elf_buffer);
    180 
    181 	/* Cache the config size per symbol */
    182 	if (binary->global_symbol_count) {
    183 		binary->config_size_per_symbol =
    184 			binary->config_size / binary->global_symbol_count;
    185 	} else {
    186 		binary->global_symbol_count = 1;
    187 		binary->config_size_per_symbol = binary->config_size;
    188 	}
    189 }
    190 
    191 static
    192 const unsigned char *ac_shader_binary_config_start(
    193 	const struct ac_shader_binary *binary,
    194 	uint64_t symbol_offset)
    195 {
    196 	unsigned i;
    197 	for (i = 0; i < binary->global_symbol_count; ++i) {
    198 		if (binary->global_symbol_offsets[i] == symbol_offset) {
    199 			unsigned offset = i * binary->config_size_per_symbol;
    200 			return binary->config + offset;
    201 		}
    202 	}
    203 	return binary->config;
    204 }
    205 
    206 
    207 static const char *scratch_rsrc_dword0_symbol =
    208 	"SCRATCH_RSRC_DWORD0";
    209 
    210 static const char *scratch_rsrc_dword1_symbol =
    211 	"SCRATCH_RSRC_DWORD1";
    212 
    213 void ac_shader_binary_read_config(struct ac_shader_binary *binary,
    214 				  struct ac_shader_config *conf,
    215 				  unsigned symbol_offset)
    216 {
    217 	unsigned i;
    218 	const unsigned char *config =
    219 		ac_shader_binary_config_start(binary, symbol_offset);
    220 	bool really_needs_scratch = false;
    221 
    222 	/* LLVM adds SGPR spills to the scratch size.
    223 	 * Find out if we really need the scratch buffer.
    224 	 */
    225 	for (i = 0; i < binary->reloc_count; i++) {
    226 		const struct ac_shader_reloc *reloc = &binary->relocs[i];
    227 
    228 		if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
    229 		    !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
    230 			really_needs_scratch = true;
    231 			break;
    232 		}
    233 	}
    234 
    235 	for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
    236 		unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
    237 		unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
    238 		switch (reg) {
    239 		case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
    240 		case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
    241 		case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
    242 		case R_00B848_COMPUTE_PGM_RSRC1:
    243 			conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
    244 			conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
    245 			conf->float_mode =  G_00B028_FLOAT_MODE(value);
    246 			break;
    247 		case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
    248 			conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
    249 			break;
    250 		case R_00B84C_COMPUTE_PGM_RSRC2:
    251 			conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
    252 			break;
    253 		case R_0286CC_SPI_PS_INPUT_ENA:
    254 			conf->spi_ps_input_ena = value;
    255 			break;
    256 		case R_0286D0_SPI_PS_INPUT_ADDR:
    257 			conf->spi_ps_input_addr = value;
    258 			break;
    259 		case R_0286E8_SPI_TMPRING_SIZE:
    260 		case R_00B860_COMPUTE_TMPRING_SIZE:
    261 			/* WAVESIZE is in units of 256 dwords. */
    262 			if (really_needs_scratch)
    263 				conf->scratch_bytes_per_wave =
    264 					G_00B860_WAVESIZE(value) * 256 * 4;
    265 			break;
    266 		case SPILLED_SGPRS:
    267 			conf->spilled_sgprs = value;
    268 			break;
    269 		case SPILLED_VGPRS:
    270 			conf->spilled_vgprs = value;
    271 			break;
    272 		default:
    273 			{
    274 				static bool printed;
    275 
    276 				if (!printed) {
    277 					fprintf(stderr, "Warning: LLVM emitted unknown "
    278 						"config register: 0x%x\n", reg);
    279 					printed = true;
    280 				}
    281 			}
    282 			break;
    283 		}
    284 
    285 		if (!conf->spi_ps_input_addr)
    286 			conf->spi_ps_input_addr = conf->spi_ps_input_ena;
    287 	}
    288 }
    289