Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright 2009 Nicolai Hhnle <nhaehnle (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
     22 
     23 #include "radeon_compiler.h"
     24 
     25 #include <stdarg.h>
     26 #include <stdio.h>
     27 #include <stdlib.h>
     28 
     29 #include "radeon_dataflow.h"
     30 #include "radeon_program.h"
     31 #include "radeon_program_pair.h"
     32 #include "radeon_regalloc.h"
     33 #include "radeon_compiler_util.h"
     34 
     35 
     36 void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)
     37 {
     38 	memset(c, 0, sizeof(*c));
     39 
     40 	memory_pool_init(&c->Pool);
     41 	c->Program.Instructions.Prev = &c->Program.Instructions;
     42 	c->Program.Instructions.Next = &c->Program.Instructions;
     43 	c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
     44 	c->regalloc_state = rs;
     45 }
     46 
     47 void rc_destroy(struct radeon_compiler * c)
     48 {
     49 	rc_constants_destroy(&c->Program.Constants);
     50 	memory_pool_destroy(&c->Pool);
     51 	free(c->ErrorMsg);
     52 }
     53 
     54 void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
     55 {
     56 	va_list ap;
     57 
     58 	if (!(c->Debug & RC_DBG_LOG))
     59 		return;
     60 
     61 	va_start(ap, fmt);
     62 	vfprintf(stderr, fmt, ap);
     63 	va_end(ap);
     64 }
     65 
     66 void rc_error(struct radeon_compiler * c, const char * fmt, ...)
     67 {
     68 	va_list ap;
     69 
     70 	c->Error = 1;
     71 
     72 	if (!c->ErrorMsg) {
     73 		/* Only remember the first error */
     74 		char buf[1024];
     75 		int written;
     76 
     77 		va_start(ap, fmt);
     78 		written = vsnprintf(buf, sizeof(buf), fmt, ap);
     79 		va_end(ap);
     80 
     81 		if (written < sizeof(buf)) {
     82 			c->ErrorMsg = strdup(buf);
     83 		} else {
     84 			c->ErrorMsg = malloc(written + 1);
     85 
     86 			va_start(ap, fmt);
     87 			vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
     88 			va_end(ap);
     89 		}
     90 	}
     91 
     92 	if (c->Debug & RC_DBG_LOG) {
     93 		fprintf(stderr, "r300compiler error: ");
     94 
     95 		va_start(ap, fmt);
     96 		vfprintf(stderr, fmt, ap);
     97 		va_end(ap);
     98 	}
     99 }
    100 
    101 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
    102 {
    103 	rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
    104 	return 1;
    105 }
    106 
    107 /**
    108  * Recompute c->Program.InputsRead and c->Program.OutputsWritten
    109  * based on which inputs and outputs are actually referenced
    110  * in program instructions.
    111  */
    112 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
    113 {
    114 	struct rc_instruction *inst;
    115 
    116 	c->Program.InputsRead = 0;
    117 	c->Program.OutputsWritten = 0;
    118 
    119 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
    120 	{
    121 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    122 		int i;
    123 
    124 		for (i = 0; i < opcode->NumSrcRegs; ++i) {
    125 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
    126 				c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
    127 		}
    128 
    129 		if (opcode->HasDstReg) {
    130 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
    131 				c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
    132 		}
    133 	}
    134 }
    135 
    136 /**
    137  * Rewrite the program such that everything that source the given input
    138  * register will source new_input instead.
    139  */
    140 void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
    141 {
    142 	struct rc_instruction * inst;
    143 
    144 	c->Program.InputsRead &= ~(1 << input);
    145 
    146 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
    147 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    148 		unsigned i;
    149 
    150 		for(i = 0; i < opcode->NumSrcRegs; ++i) {
    151 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
    152 				inst->U.I.SrcReg[i].File = new_input.File;
    153 				inst->U.I.SrcReg[i].Index = new_input.Index;
    154 				inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
    155 				if (!inst->U.I.SrcReg[i].Abs) {
    156 					inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
    157 					inst->U.I.SrcReg[i].Abs = new_input.Abs;
    158 				}
    159 
    160 				c->Program.InputsRead |= 1 << new_input.Index;
    161 			}
    162 		}
    163 	}
    164 }
    165 
    166 
    167 /**
    168  * Rewrite the program such that everything that writes into the given
    169  * output register will instead write to new_output. The new_output
    170  * writemask is honoured.
    171  */
    172 void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
    173 {
    174 	struct rc_instruction * inst;
    175 
    176 	c->Program.OutputsWritten &= ~(1 << output);
    177 
    178 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
    179 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    180 
    181 		if (opcode->HasDstReg) {
    182 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
    183 				inst->U.I.DstReg.Index = new_output;
    184 				inst->U.I.DstReg.WriteMask &= writemask;
    185 
    186 				c->Program.OutputsWritten |= 1 << new_output;
    187 			}
    188 		}
    189 	}
    190 }
    191 
    192 
    193 /**
    194  * Rewrite the program such that a given output is duplicated.
    195  */
    196 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
    197 {
    198 	unsigned tempreg = rc_find_free_temporary(c);
    199 	struct rc_instruction * inst;
    200 
    201 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
    202 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    203 
    204 		if (opcode->HasDstReg) {
    205 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
    206 				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
    207 				inst->U.I.DstReg.Index = tempreg;
    208 			}
    209 		}
    210 	}
    211 
    212 	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
    213 	inst->U.I.Opcode = RC_OPCODE_MOV;
    214 	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
    215 	inst->U.I.DstReg.Index = output;
    216 
    217 	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
    218 	inst->U.I.SrcReg[0].Index = tempreg;
    219 	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
    220 
    221 	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
    222 	inst->U.I.Opcode = RC_OPCODE_MOV;
    223 	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
    224 	inst->U.I.DstReg.Index = dup_output;
    225 
    226 	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
    227 	inst->U.I.SrcReg[0].Index = tempreg;
    228 	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
    229 
    230 	c->Program.OutputsWritten |= 1 << dup_output;
    231 }
    232 
    233 
    234 /**
    235  * Introduce standard code fragment to deal with fragment.position.
    236  */
    237 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
    238                                 int full_vtransform)
    239 {
    240 	unsigned tempregi = rc_find_free_temporary(c);
    241 	struct rc_instruction * inst_rcp;
    242 	struct rc_instruction * inst_mul;
    243 	struct rc_instruction * inst_mad;
    244 	struct rc_instruction * inst;
    245 
    246 	c->Program.InputsRead &= ~(1 << wpos);
    247 	c->Program.InputsRead |= 1 << new_input;
    248 
    249 	/* perspective divide */
    250 	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
    251 	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
    252 
    253 	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
    254 	inst_rcp->U.I.DstReg.Index = tempregi;
    255 	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
    256 
    257 	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
    258 	inst_rcp->U.I.SrcReg[0].Index = new_input;
    259 	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
    260 
    261 	inst_mul = rc_insert_new_instruction(c, inst_rcp);
    262 	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
    263 
    264 	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
    265 	inst_mul->U.I.DstReg.Index = tempregi;
    266 	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
    267 
    268 	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
    269 	inst_mul->U.I.SrcReg[0].Index = new_input;
    270 
    271 	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
    272 	inst_mul->U.I.SrcReg[1].Index = tempregi;
    273 	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
    274 
    275 	/* viewport transformation */
    276 	inst_mad = rc_insert_new_instruction(c, inst_mul);
    277 	inst_mad->U.I.Opcode = RC_OPCODE_MAD;
    278 
    279 	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
    280 	inst_mad->U.I.DstReg.Index = tempregi;
    281 	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
    282 
    283 	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
    284 	inst_mad->U.I.SrcReg[0].Index = tempregi;
    285 	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
    286 
    287 	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
    288 	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
    289 
    290 	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
    291 	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
    292 
    293 	if (full_vtransform) {
    294 		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
    295 		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
    296 	} else {
    297 		inst_mad->U.I.SrcReg[1].Index =
    298 		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
    299 	}
    300 
    301 	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
    302 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    303 		unsigned i;
    304 
    305 		for(i = 0; i < opcode->NumSrcRegs; i++) {
    306 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
    307 			    inst->U.I.SrcReg[i].Index == wpos) {
    308 				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
    309 				inst->U.I.SrcReg[i].Index = tempregi;
    310 			}
    311 		}
    312 	}
    313 }
    314 
    315 
    316 /**
    317  * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
    318  * Gallium and OpenGL define it the other way around.
    319  *
    320  * So let's just negate FACE at the beginning of the shader and rewrite the rest
    321  * of the shader to read from the newly allocated temporary.
    322  */
    323 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
    324 {
    325 	unsigned tempregi = rc_find_free_temporary(c);
    326 	struct rc_instruction *inst_add;
    327 	struct rc_instruction *inst;
    328 
    329 	/* perspective divide */
    330 	inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
    331 	inst_add->U.I.Opcode = RC_OPCODE_ADD;
    332 
    333 	inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
    334 	inst_add->U.I.DstReg.Index = tempregi;
    335 	inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
    336 
    337 	inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
    338 	inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
    339 
    340 	inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
    341 	inst_add->U.I.SrcReg[1].Index = face;
    342 	inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
    343 	inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
    344 
    345 	for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
    346 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    347 		unsigned i;
    348 
    349 		for(i = 0; i < opcode->NumSrcRegs; i++) {
    350 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
    351 			    inst->U.I.SrcReg[i].Index == face) {
    352 				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
    353 				inst->U.I.SrcReg[i].Index = tempregi;
    354 			}
    355 		}
    356 	}
    357 }
    358 
    359 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
    360 		rc_register_file file, unsigned int index, unsigned int mask)
    361 {
    362 	struct rc_program_stats *s = userdata;
    363 	if (file == RC_FILE_TEMPORARY)
    364 		(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
    365 	if (file == RC_FILE_INLINE)
    366 		s->num_inline_literals++;
    367 }
    368 
    369 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
    370 {
    371 	struct rc_instruction * tmp;
    372 	memset(s, 0, sizeof(*s));
    373 
    374 	for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
    375 							tmp = tmp->Next){
    376 		const struct rc_opcode_info * info;
    377 		rc_for_all_reads_mask(tmp, reg_count_callback, s);
    378 		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
    379 			info = rc_get_opcode_info(tmp->U.I.Opcode);
    380 			if (info->Opcode == RC_OPCODE_BEGIN_TEX)
    381 				continue;
    382 			if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
    383 				s->num_presub_ops++;
    384 		} else {
    385 			if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
    386 				s->num_presub_ops++;
    387 			if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
    388 				s->num_presub_ops++;
    389 			/* Assuming alpha will never be a flow control or
    390 			 * a tex instruction. */
    391 			if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
    392 				s->num_alpha_insts++;
    393 			if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
    394 				s->num_rgb_insts++;
    395 			if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
    396 				tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
    397 				s->num_omod_ops++;
    398 			}
    399 			if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
    400 				tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
    401 				s->num_omod_ops++;
    402 			}
    403 			info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
    404 		}
    405 		if (info->IsFlowControl)
    406 			s->num_fc_insts++;
    407 		if (info->HasTexture)
    408 			s->num_tex_insts++;
    409 		s->num_insts++;
    410 	}
    411 	/* Increment here because the reg_count_callback store the max
    412 	 * temporary reg index in s->nun_temp_regs. */
    413 	s->num_temp_regs++;
    414 }
    415 
    416 static void print_stats(struct radeon_compiler * c)
    417 {
    418 	struct rc_program_stats s;
    419 
    420 	if (c->initial_num_insts <= 5)
    421 		return;
    422 
    423 	rc_get_stats(c, &s);
    424 
    425 	switch (c->type) {
    426 	case RC_VERTEX_PROGRAM:
    427 		fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
    428 			       "~%4u Instructions\n"
    429 			       "~%4u Flow Control Instructions\n"
    430 			       "~%4u Temporary Registers\n"
    431 			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
    432 			       s.num_insts, s.num_fc_insts, s.num_temp_regs);
    433 		break;
    434 
    435 	case RC_FRAGMENT_PROGRAM:
    436 		fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
    437 			       "~%4u Instructions\n"
    438 			       "~%4u Vector Instructions (RGB)\n"
    439 			       "~%4u Scalar Instructions (Alpha)\n"
    440 			       "~%4u Flow Control Instructions\n"
    441 			       "~%4u Texture Instructions\n"
    442 			       "~%4u Presub Operations\n"
    443 			       "~%4u OMOD Operations\n"
    444 			       "~%4u Temporary Registers\n"
    445 			       "~%4u Inline Literals\n"
    446 			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
    447 			       s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
    448 			       s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
    449 			       s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
    450 		break;
    451 	default:
    452 		assert(0);
    453 	}
    454 }
    455 
    456 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
    457 	"Vertex Program",
    458 	"Fragment Program"
    459 };
    460 
    461 void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
    462 {
    463 	for (unsigned i = 0; list[i].name; i++) {
    464 		if (list[i].predicate) {
    465 			list[i].run(c, list[i].user);
    466 
    467 			if (c->Error)
    468 				return;
    469 
    470 			if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
    471 				fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
    472 				rc_print_program(&c->Program);
    473 			}
    474 		}
    475 	}
    476 }
    477 
    478 /* Executes a list of compiler passes given in the parameter 'list'. */
    479 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
    480 {
    481 	struct rc_program_stats s;
    482 
    483 	rc_get_stats(c, &s);
    484 	c->initial_num_insts = s.num_insts;
    485 
    486 	if (c->Debug & RC_DBG_LOG) {
    487 		fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
    488 		rc_print_program(&c->Program);
    489 	}
    490 
    491 	rc_run_compiler_passes(c, list);
    492 
    493 	if (c->Debug & RC_DBG_STATS)
    494 		print_stats(c);
    495 }
    496 
    497 void rc_validate_final_shader(struct radeon_compiler *c, void *user)
    498 {
    499 	/* Check the number of constants. */
    500 	if (c->Program.Constants.Count > c->max_constants) {
    501 		rc_error(c, "Too many constants. Max: %i, Got: %i\n",
    502 			 c->max_constants, c->Program.Constants.Count);
    503 	}
    504 }
    505