Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright (C) 2005 Ben Skeggs.
      3  *
      4  * Copyright 2008 Corbin Simpson <MostAwesomeDude (at) gmail.com>
      5  * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
      6  *
      7  * All Rights Reserved.
      8  *
      9  * Permission is hereby granted, free of charge, to any person obtaining
     10  * a copy of this software and associated documentation files (the
     11  * "Software"), to deal in the Software without restriction, including
     12  * without limitation the rights to use, copy, modify, merge, publish,
     13  * distribute, sublicense, and/or sell copies of the Software, and to
     14  * permit persons to whom the Software is furnished to do so, subject to
     15  * the following conditions:
     16  *
     17  * The above copyright notice and this permission notice (including the
     18  * next paragraph) shall be included in all copies or substantial
     19  * portions of the Software.
     20  *
     21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     24  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     25  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     26  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     27  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     28  *
     29  */
     30 
     31 /**
     32  * \file
     33  *
     34  * \author Ben Skeggs <darktama (at) iinet.net.au>
     35  *
     36  * \author Jerome Glisse <j.glisse (at) gmail.com>
     37  *
     38  * \author Corbin Simpson <MostAwesomeDude (at) gmail.com>
     39  *
     40  */
     41 
     42 #include "r500_fragprog.h"
     43 
     44 #include "r300_reg.h"
     45 
     46 #include "radeon_program_pair.h"
     47 
     48 #define PROG_CODE \
     49 	struct r500_fragment_program_code *code = &c->code->code.r500
     50 
     51 #define error(fmt, args...) do {			\
     52 		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
     53 			__FILE__, __FUNCTION__, ##args);	\
     54 	} while(0)
     55 
     56 
     57 struct branch_info {
     58 	int If;
     59 	int Else;
     60 	int Endif;
     61 };
     62 
     63 struct r500_loop_info {
     64 	int BgnLoop;
     65 
     66 	int BranchDepth;
     67 	int * Brks;
     68 	int BrkCount;
     69 	int BrkReserved;
     70 
     71 	int * Conts;
     72 	int ContCount;
     73 	int ContReserved;
     74 };
     75 
     76 struct emit_state {
     77 	struct radeon_compiler * C;
     78 	struct r500_fragment_program_code * Code;
     79 
     80 	struct branch_info * Branches;
     81 	unsigned int CurrentBranchDepth;
     82 	unsigned int BranchesReserved;
     83 
     84 	struct r500_loop_info * Loops;
     85 	unsigned int CurrentLoopDepth;
     86 	unsigned int LoopsReserved;
     87 
     88 	unsigned int MaxBranchDepth;
     89 
     90 };
     91 
     92 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
     93 {
     94 	switch(opcode) {
     95 	case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
     96 	case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
     97 	case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
     98 	case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
     99 	case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
    100 	case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
    101 	case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
    102 	default:
    103 		error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
    104 		/* fall through */
    105 	case RC_OPCODE_NOP:
    106 		/* fall through */
    107 	case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
    108 	case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
    109 	case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
    110 	case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
    111 	}
    112 }
    113 
    114 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
    115 {
    116 	switch(opcode) {
    117 	case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
    118 	case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
    119 	case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
    120 	case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
    121 	case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
    122 	case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
    123 	case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
    124 	case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
    125 	case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
    126 	case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
    127 	default:
    128 		error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
    129 		/* fall through */
    130 	case RC_OPCODE_NOP:
    131 		/* fall through */
    132 	case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
    133 	case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
    134 	case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
    135 	case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
    136 	case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
    137 	case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
    138 	}
    139 }
    140 
    141 static unsigned int fix_hw_swizzle(unsigned int swz)
    142 {
    143     switch (swz) {
    144         case RC_SWIZZLE_ZERO:
    145         case RC_SWIZZLE_UNUSED:
    146             swz = 4;
    147             break;
    148         case RC_SWIZZLE_HALF:
    149             swz = 5;
    150             break;
    151         case RC_SWIZZLE_ONE:
    152             swz = 6;
    153             break;
    154     }
    155 
    156 	return swz;
    157 }
    158 
    159 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
    160 {
    161 	unsigned int t = inst->RGB.Arg[arg].Source;
    162 	int comp;
    163 	t |= inst->RGB.Arg[arg].Negate << 11;
    164 	t |= inst->RGB.Arg[arg].Abs << 12;
    165 
    166 	for(comp = 0; comp < 3; ++comp)
    167 		t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
    168 
    169 	return t;
    170 }
    171 
    172 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
    173 {
    174 	unsigned int t = inst->Alpha.Arg[i].Source;
    175 	t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
    176 	t |= inst->Alpha.Arg[i].Negate << 5;
    177 	t |= inst->Alpha.Arg[i].Abs << 6;
    178 	return t;
    179 }
    180 
    181 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
    182 {
    183 	switch(func) {
    184 	case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
    185 	case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
    186 	case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
    187 	case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
    188 	default:
    189 		rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
    190 		return 0;
    191 	}
    192 }
    193 
    194 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
    195 {
    196 	if (index > code->max_temp_idx)
    197 		code->max_temp_idx = index;
    198 }
    199 
    200 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
    201 {
    202 	/* From docs:
    203 	 *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
    204 	 * MSB = 1 << 7 */
    205 	if (!src.Used)
    206 		return 1 << 7;
    207 
    208 	if (src.File == RC_FILE_CONSTANT) {
    209 		return src.Index | R500_RGB_ADDR0_CONST;
    210 	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
    211 		use_temporary(code, src.Index);
    212 		return src.Index;
    213 	} else if (src.File == RC_FILE_INLINE) {
    214 		return src.Index | (1 << 7);
    215 	}
    216 
    217 	return 0;
    218 }
    219 
    220 /**
    221  * NOP the specified instruction if it is not a texture lookup.
    222  */
    223 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
    224 {
    225 	PROG_CODE;
    226 
    227 	if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
    228 		code->inst[ip].inst0 |= R500_INST_NOP;
    229 	}
    230 }
    231 
    232 /**
    233  * Emit a paired ALU instruction.
    234  */
    235 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
    236 {
    237 	int ip;
    238 	PROG_CODE;
    239 
    240 	if (code->inst_end >= c->Base.max_alu_insts-1) {
    241 		error("emit_alu: Too many instructions");
    242 		return;
    243 	}
    244 
    245 	ip = ++code->inst_end;
    246 
    247 	/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
    248 	if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
    249 		inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
    250 		if (ip > 0) {
    251 			alu_nop(c, ip - 1);
    252 		}
    253 	}
    254 
    255 	code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
    256 	code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
    257 
    258 	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
    259 		code->inst[ip].inst0 = R500_INST_TYPE_OUT;
    260 		if (inst->WriteALUResult) {
    261 			error("Cannot write output and ALU result at the same time");
    262 			return;
    263 		}
    264 	} else {
    265 		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
    266 	}
    267 	code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
    268 
    269 	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
    270 	code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
    271 	code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
    272 	if (inst->Nop) {
    273 		code->inst[ip].inst0 |= R500_INST_NOP;
    274 	}
    275 	if (inst->Alpha.DepthWriteMask) {
    276 		code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
    277 		c->code->writes_depth = 1;
    278 	}
    279 
    280 	code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
    281 	code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
    282 	use_temporary(code, inst->Alpha.DestIndex);
    283 	use_temporary(code, inst->RGB.DestIndex);
    284 
    285 	if (inst->RGB.Saturate)
    286 		code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
    287 	if (inst->Alpha.Saturate)
    288 		code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
    289 
    290 	/* Set the presubtract operation. */
    291 	switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
    292 		case RC_PRESUB_BIAS:
    293 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
    294 			break;
    295 		case RC_PRESUB_SUB:
    296 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
    297 			break;
    298 		case RC_PRESUB_ADD:
    299 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
    300 			break;
    301 		case RC_PRESUB_INV:
    302 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
    303 			break;
    304 		default:
    305 			break;
    306 	}
    307 	switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
    308 		case RC_PRESUB_BIAS:
    309 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
    310 			break;
    311 		case RC_PRESUB_SUB:
    312 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
    313 			break;
    314 		case RC_PRESUB_ADD:
    315 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
    316 			break;
    317 		case RC_PRESUB_INV:
    318 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
    319 			break;
    320 		default:
    321 			break;
    322 	}
    323 
    324 	/* Set the output modifier */
    325 	code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
    326 	code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
    327 
    328 	code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
    329 	code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
    330 	code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
    331 
    332 	code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
    333 	code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
    334 	code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
    335 
    336 	code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
    337 	code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
    338 	code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
    339 
    340 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
    341 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
    342 	code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
    343 
    344 	code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
    345 	code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
    346 
    347 	if (inst->WriteALUResult) {
    348 		code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
    349 
    350 		if (inst->WriteALUResult == RC_ALURESULT_X)
    351 			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
    352 		else
    353 			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
    354 
    355 		code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
    356 	}
    357 }
    358 
    359 static unsigned int translate_strq_swizzle(unsigned int swizzle)
    360 {
    361 	unsigned int swiz = 0;
    362 	int i;
    363 	for (i = 0; i < 4; i++)
    364 		swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
    365 	return swiz;
    366 }
    367 
    368 /**
    369  * Emit a single TEX instruction
    370  */
    371 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
    372 {
    373 	int ip;
    374 	PROG_CODE;
    375 
    376 	if (code->inst_end >= c->Base.max_alu_insts-1) {
    377 		error("emit_tex: Too many instructions");
    378 		return 0;
    379 	}
    380 
    381 	ip = ++code->inst_end;
    382 
    383 	code->inst[ip].inst0 = R500_INST_TYPE_TEX
    384 		| (inst->DstReg.WriteMask << 11)
    385 		| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
    386 	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
    387 		| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
    388 
    389 	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
    390 		code->inst[ip].inst1 |= R500_TEX_UNSCALED;
    391 
    392 	switch (inst->Opcode) {
    393 	case RC_OPCODE_KIL:
    394 		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
    395 		break;
    396 	case RC_OPCODE_TEX:
    397 		code->inst[ip].inst1 |= R500_TEX_INST_LD;
    398 		break;
    399 	case RC_OPCODE_TXB:
    400 		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
    401 		break;
    402 	case RC_OPCODE_TXP:
    403 		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
    404 		break;
    405 	case RC_OPCODE_TXD:
    406 		code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
    407 		break;
    408 	case RC_OPCODE_TXL:
    409 		code->inst[ip].inst1 |= R500_TEX_INST_LOD;
    410 		break;
    411 	default:
    412 		error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
    413 	}
    414 
    415 	use_temporary(code, inst->SrcReg[0].Index);
    416 	if (inst->Opcode != RC_OPCODE_KIL)
    417 		use_temporary(code, inst->DstReg.Index);
    418 
    419 	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
    420 		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
    421 		| R500_TEX_DST_ADDR(inst->DstReg.Index)
    422 		| (GET_SWZ(inst->TexSwizzle, 0) << 24)
    423 		| (GET_SWZ(inst->TexSwizzle, 1) << 26)
    424 		| (GET_SWZ(inst->TexSwizzle, 2) << 28)
    425 		| (GET_SWZ(inst->TexSwizzle, 3) << 30)
    426 		;
    427 
    428 	if (inst->Opcode == RC_OPCODE_TXD) {
    429 		use_temporary(code, inst->SrcReg[1].Index);
    430 		use_temporary(code, inst->SrcReg[2].Index);
    431 
    432 		/* DX and DY parameters are specified in a separate register. */
    433 		code->inst[ip].inst3 =
    434 			R500_DX_ADDR(inst->SrcReg[1].Index) |
    435 			(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
    436 			R500_DY_ADDR(inst->SrcReg[2].Index) |
    437 			(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
    438 	}
    439 
    440 	return 1;
    441 }
    442 
    443 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
    444 {
    445 	unsigned int newip;
    446 
    447 	if (s->Code->inst_end >= s->C->max_alu_insts-1) {
    448 		rc_error(s->C, "emit_tex: Too many instructions");
    449 		return;
    450 	}
    451 
    452 	newip = ++s->Code->inst_end;
    453 
    454 	/* Currently all loops use the same integer constant to intialize
    455 	 * the loop variables. */
    456 	if(!s->Code->int_constants[0]) {
    457 		s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
    458 		s->Code->int_constant_count = 1;
    459 	}
    460 	s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
    461 
    462 	switch(inst->U.I.Opcode){
    463 	struct branch_info * branch;
    464 	struct r500_loop_info * loop;
    465 	case RC_OPCODE_BGNLOOP:
    466 		memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
    467 			s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
    468 
    469 		loop = &s->Loops[s->CurrentLoopDepth++];
    470 		memset(loop, 0, sizeof(struct r500_loop_info));
    471 		loop->BranchDepth = s->CurrentBranchDepth;
    472 		loop->BgnLoop = newip;
    473 
    474 		s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
    475 			| R500_FC_JUMP_FUNC(0x00)
    476 			| R500_FC_IGNORE_UNCOVERED
    477 			;
    478 		break;
    479 	case RC_OPCODE_BRK:
    480 		loop = &s->Loops[s->CurrentLoopDepth - 1];
    481 		memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
    482 					loop->BrkCount, loop->BrkReserved, 1);
    483 
    484 		loop->Brks[loop->BrkCount++] = newip;
    485 		s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
    486 			| R500_FC_JUMP_FUNC(0xff)
    487 			| R500_FC_B_OP1_DECR
    488 			| R500_FC_B_POP_CNT(
    489 				s->CurrentBranchDepth - loop->BranchDepth)
    490 			| R500_FC_IGNORE_UNCOVERED
    491 			;
    492 		break;
    493 
    494 	case RC_OPCODE_CONT:
    495 		loop = &s->Loops[s->CurrentLoopDepth - 1];
    496 		memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
    497 					loop->ContCount, loop->ContReserved, 1);
    498 		loop->Conts[loop->ContCount++] = newip;
    499 		s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
    500 			| R500_FC_JUMP_FUNC(0xff)
    501 			| R500_FC_B_OP1_DECR
    502 			| R500_FC_B_POP_CNT(
    503 				s->CurrentBranchDepth -	loop->BranchDepth)
    504 			| R500_FC_IGNORE_UNCOVERED
    505 			;
    506 		break;
    507 
    508 	case RC_OPCODE_ENDLOOP:
    509 	{
    510 		loop = &s->Loops[s->CurrentLoopDepth - 1];
    511 		/* Emit ENDLOOP */
    512 		s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
    513 			| R500_FC_JUMP_FUNC(0xff)
    514 			| R500_FC_JUMP_ANY
    515 			| R500_FC_IGNORE_UNCOVERED
    516 			;
    517 		/* The constant integer at index 0 is used by all loops. */
    518 		s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
    519 			| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
    520 			;
    521 
    522 		/* Set jump address and int constant for BGNLOOP */
    523 		s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
    524 			| R500_FC_JUMP_ADDR(newip)
    525 			;
    526 
    527 		/* Set jump address for the BRK instructions. */
    528 		while(loop->BrkCount--) {
    529 			s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
    530 						R500_FC_JUMP_ADDR(newip + 1);
    531 		}
    532 
    533 		/* Set jump address for CONT instructions. */
    534 		while(loop->ContCount--) {
    535 			s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
    536 						R500_FC_JUMP_ADDR(newip);
    537 		}
    538 		s->CurrentLoopDepth--;
    539 		break;
    540 	}
    541 	case RC_OPCODE_IF:
    542 		if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
    543 			rc_error(s->C, "Branch depth exceeds hardware limit");
    544 			return;
    545 		}
    546 		memory_pool_array_reserve(&s->C->Pool, struct branch_info,
    547 				s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
    548 
    549 		branch = &s->Branches[s->CurrentBranchDepth++];
    550 		branch->If = newip;
    551 		branch->Else = -1;
    552 		branch->Endif = -1;
    553 
    554 		if (s->CurrentBranchDepth > s->MaxBranchDepth)
    555 			s->MaxBranchDepth = s->CurrentBranchDepth;
    556 
    557 		/* actual instruction is filled in at ENDIF time */
    558 		break;
    559 
    560 	case RC_OPCODE_ELSE:
    561 		if (!s->CurrentBranchDepth) {
    562 			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
    563 			return;
    564 		}
    565 
    566 		branch = &s->Branches[s->CurrentBranchDepth - 1];
    567 		branch->Else = newip;
    568 
    569 		/* actual instruction is filled in at ENDIF time */
    570 		break;
    571 
    572 	case RC_OPCODE_ENDIF:
    573 		if (!s->CurrentBranchDepth) {
    574 			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
    575 			return;
    576 		}
    577 
    578 		branch = &s->Branches[s->CurrentBranchDepth - 1];
    579 		branch->Endif = newip;
    580 
    581 		s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
    582 			| R500_FC_A_OP_NONE /* no address stack */
    583 			| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
    584 			| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
    585 			| R500_FC_B_OP1_NONE /* no branch counter if stay */
    586 			| R500_FC_B_POP_CNT(1)
    587 			;
    588 		s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
    589 		s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
    590 			| R500_FC_A_OP_NONE /* no address stack */
    591 			| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
    592 			| R500_FC_B_OP0_INCR /* increment branch counter if stay */
    593 			| R500_FC_IGNORE_UNCOVERED
    594 		;
    595 
    596 		if (branch->Else >= 0) {
    597 			/* increment branch counter also if jump */
    598 			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
    599 			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
    600 
    601 			s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
    602 				| R500_FC_A_OP_NONE /* no address stack */
    603 				| R500_FC_B_ELSE /* all active pixels want to jump */
    604 				| R500_FC_B_OP0_NONE /* no counter op if stay */
    605 				| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
    606 				| R500_FC_B_POP_CNT(1)
    607 			;
    608 			s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
    609 		} else {
    610 			/* don't touch branch counter on jump */
    611 			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
    612 			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
    613 		}
    614 
    615 
    616 		s->CurrentBranchDepth--;
    617 		break;
    618 	default:
    619 		rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
    620 	}
    621 }
    622 
    623 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
    624 {
    625 	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
    626 	struct emit_state s;
    627 	struct r500_fragment_program_code *code = &compiler->code->code.r500;
    628 
    629 	memset(&s, 0, sizeof(s));
    630 	s.C = &compiler->Base;
    631 	s.Code = code;
    632 
    633 	memset(code, 0, sizeof(*code));
    634 	code->max_temp_idx = 1;
    635 	code->inst_end = -1;
    636 
    637 	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
    638 	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
    639 	    inst = inst->Next) {
    640 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
    641 			const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    642 
    643 			if (opcode->IsFlowControl) {
    644 				emit_flowcontrol(&s, inst);
    645 			} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
    646 				continue;
    647 			} else {
    648 				emit_tex(compiler, &inst->U.I);
    649 			}
    650 		} else {
    651 			emit_paired(compiler, &inst->U.P);
    652 		}
    653 	}
    654 
    655 	if (code->max_temp_idx >= compiler->Base.max_temp_regs)
    656 		rc_error(&compiler->Base, "Too many hardware temporaries used");
    657 
    658 	if (compiler->Base.Error)
    659 		return;
    660 
    661 	if (code->inst_end == -1 ||
    662 	    (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
    663 		int ip;
    664 
    665 		/* This may happen when dead-code elimination is disabled or
    666 		 * when most of the fragment program logic is leading to a KIL */
    667 		if (code->inst_end >= compiler->Base.max_alu_insts-1) {
    668 			rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
    669 			return;
    670 		}
    671 
    672 		ip = ++code->inst_end;
    673 		code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
    674 	}
    675 
    676 	/* Make sure TEX_SEM_WAIT is set on the last instruction */
    677 	code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
    678 
    679 	/* Enable full flow control mode if we are using loops or have if
    680 	 * statements nested at least four deep. */
    681 	if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
    682 		if (code->max_temp_idx < 1)
    683 			code->max_temp_idx = 1;
    684 
    685 		code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
    686 	}
    687 }
    688