Home | History | Annotate | Download | only in a2xx
      1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
      2 
      3 /*
      4  * Copyright (C) 2012 Rob Clark <robclark (at) freedesktop.org>
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23  * SOFTWARE.
     24  *
     25  * Authors:
     26  *    Rob Clark <robclark (at) freedesktop.org>
     27  */
     28 
     29 #include "pipe/p_state.h"
     30 #include "util/u_string.h"
     31 #include "util/u_memory.h"
     32 #include "util/u_inlines.h"
     33 #include "tgsi/tgsi_parse.h"
     34 #include "tgsi/tgsi_ureg.h"
     35 #include "tgsi/tgsi_info.h"
     36 #include "tgsi/tgsi_strings.h"
     37 #include "tgsi/tgsi_dump.h"
     38 
     39 #include "fd2_compiler.h"
     40 #include "fd2_program.h"
     41 #include "fd2_util.h"
     42 
     43 #include "instr-a2xx.h"
     44 #include "ir-a2xx.h"
     45 
     46 struct fd2_compile_context {
     47 	struct fd_program_stateobj *prog;
     48 	struct fd2_shader_stateobj *so;
     49 
     50 	struct tgsi_parse_context parser;
     51 	unsigned type;
     52 
     53 	/* predicate stack: */
     54 	int pred_depth;
     55 	enum ir2_pred pred_stack[8];
     56 
     57 	/* Internal-Temporary and Predicate register assignment:
     58 	 *
     59 	 * Some TGSI instructions which translate into multiple actual
     60 	 * instructions need one or more temporary registers, which are not
     61 	 * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
     62 	 * And some instructions (texture fetch) cannot write directly to
     63 	 * output registers.  We could be more clever and re-use dst or a
     64 	 * src register in some cases.  But for now don't try to be clever.
     65 	 * Eventually we should implement an optimization pass that re-
     66 	 * juggles the register usage and gets rid of unneeded temporaries.
     67 	 *
     68 	 * The predicate register must be valid across multiple TGSI
     69 	 * instructions, but internal temporary's do not.  For this reason,
     70 	 * once the predicate register is requested, until it is no longer
     71 	 * needed, it gets the first register slot after after the TGSI
     72 	 * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
     73 	 * internal temporaries get the register slots above this.
     74 	 */
     75 
     76 	int pred_reg;
     77 	int num_internal_temps;
     78 
     79 	uint8_t num_regs[TGSI_FILE_COUNT];
     80 
     81 	/* maps input register idx to prog->export_linkage idx: */
     82 	uint8_t input_export_idx[64];
     83 
     84 	/* maps output register idx to prog->export_linkage idx: */
     85 	uint8_t output_export_idx[64];
     86 
     87 	/* idx/slot for last compiler generated immediate */
     88 	unsigned immediate_idx;
     89 
     90 	// TODO we can skip emit exports in the VS that the FS doesn't need..
     91 	// and get rid perhaps of num_param..
     92 	unsigned num_position, num_param;
     93 	unsigned position, psize;
     94 
     95 	uint64_t need_sync;
     96 
     97 	/* current exec CF instruction */
     98 	struct ir2_cf *cf;
     99 };
    100 
    101 static int
    102 semantic_idx(struct tgsi_declaration_semantic *semantic)
    103 {
    104 	int idx = semantic->Name;
    105 	if (idx == TGSI_SEMANTIC_GENERIC)
    106 		idx = TGSI_SEMANTIC_COUNT + semantic->Index;
    107 	return idx;
    108 }
    109 
    110 /* assign/get the input/export register # for given semantic idx as
    111  * returned by semantic_idx():
    112  */
    113 static int
    114 export_linkage(struct fd2_compile_context *ctx, int idx)
    115 {
    116 	struct fd_program_stateobj *prog = ctx->prog;
    117 
    118 	/* if first time we've seen this export, assign the next available slot: */
    119 	if (prog->export_linkage[idx] == 0xff)
    120 		prog->export_linkage[idx] = prog->num_exports++;
    121 
    122 	return prog->export_linkage[idx];
    123 }
    124 
    125 static unsigned
    126 compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog,
    127 		struct fd2_shader_stateobj *so)
    128 {
    129 	unsigned ret;
    130 
    131 	ctx->prog = prog;
    132 	ctx->so = so;
    133 	ctx->cf = NULL;
    134 	ctx->pred_depth = 0;
    135 
    136 	ret = tgsi_parse_init(&ctx->parser, so->tokens);
    137 	if (ret != TGSI_PARSE_OK)
    138 		return ret;
    139 
    140 	ctx->type = ctx->parser.FullHeader.Processor.Processor;
    141 	ctx->position = ~0;
    142 	ctx->psize = ~0;
    143 	ctx->num_position = 0;
    144 	ctx->num_param = 0;
    145 	ctx->need_sync = 0;
    146 	ctx->immediate_idx = 0;
    147 	ctx->pred_reg = -1;
    148 	ctx->num_internal_temps = 0;
    149 
    150 	memset(ctx->num_regs, 0, sizeof(ctx->num_regs));
    151 	memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx));
    152 	memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx));
    153 
    154 	/* do first pass to extract declarations: */
    155 	while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
    156 		tgsi_parse_token(&ctx->parser);
    157 
    158 		switch (ctx->parser.FullToken.Token.Type) {
    159 		case TGSI_TOKEN_TYPE_DECLARATION: {
    160 			struct tgsi_full_declaration *decl =
    161 					&ctx->parser.FullToken.FullDeclaration;
    162 			if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
    163 				unsigned name = decl->Semantic.Name;
    164 
    165 				assert(decl->Declaration.Semantic);  // TODO is this ever not true?
    166 
    167 				ctx->output_export_idx[decl->Range.First] =
    168 						semantic_idx(&decl->Semantic);
    169 
    170 				if (ctx->type == PIPE_SHADER_VERTEX) {
    171 					switch (name) {
    172 					case TGSI_SEMANTIC_POSITION:
    173 						ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT];
    174 						ctx->num_position++;
    175 						break;
    176 					case TGSI_SEMANTIC_PSIZE:
    177 						ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT];
    178 						ctx->num_position++;
    179 						break;
    180 					case TGSI_SEMANTIC_COLOR:
    181 					case TGSI_SEMANTIC_GENERIC:
    182 						ctx->num_param++;
    183 						break;
    184 					default:
    185 						DBG("unknown VS semantic name: %s",
    186 								tgsi_semantic_names[name]);
    187 						assert(0);
    188 					}
    189 				} else {
    190 					switch (name) {
    191 					case TGSI_SEMANTIC_COLOR:
    192 					case TGSI_SEMANTIC_GENERIC:
    193 						ctx->num_param++;
    194 						break;
    195 					default:
    196 						DBG("unknown PS semantic name: %s",
    197 								tgsi_semantic_names[name]);
    198 						assert(0);
    199 					}
    200 				}
    201 			} else if (decl->Declaration.File == TGSI_FILE_INPUT) {
    202 				ctx->input_export_idx[decl->Range.First] =
    203 						semantic_idx(&decl->Semantic);
    204 			}
    205 			ctx->num_regs[decl->Declaration.File] =
    206 					MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1);
    207 			break;
    208 		}
    209 		case TGSI_TOKEN_TYPE_IMMEDIATE: {
    210 			struct tgsi_full_immediate *imm =
    211 					&ctx->parser.FullToken.FullImmediate;
    212 			unsigned n = ctx->so->num_immediates++;
    213 			memcpy(ctx->so->immediates[n].val, imm->u, 16);
    214 			break;
    215 		}
    216 		default:
    217 			break;
    218 		}
    219 	}
    220 
    221 	/* TGSI generated immediates are always entire vec4's, ones we
    222 	 * generate internally are not:
    223 	 */
    224 	ctx->immediate_idx = ctx->so->num_immediates * 4;
    225 
    226 	ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT];
    227 
    228 	tgsi_parse_free(&ctx->parser);
    229 
    230 	return tgsi_parse_init(&ctx->parser, so->tokens);
    231 }
    232 
    233 static void
    234 compile_free(struct fd2_compile_context *ctx)
    235 {
    236 	tgsi_parse_free(&ctx->parser);
    237 }
    238 
    239 static struct ir2_cf *
    240 next_exec_cf(struct fd2_compile_context *ctx)
    241 {
    242 	struct ir2_cf *cf = ctx->cf;
    243 	if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs))
    244 		ctx->cf = cf = ir2_cf_create(ctx->so->ir, EXEC);
    245 	return cf;
    246 }
    247 
    248 static void
    249 compile_vtx_fetch(struct fd2_compile_context *ctx)
    250 {
    251 	struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs;
    252 	int i;
    253 	for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) {
    254 		struct ir2_instruction *instr = ir2_instr_create(
    255 				next_exec_cf(ctx), IR2_FETCH);
    256 		instr->fetch.opc = VTX_FETCH;
    257 
    258 		ctx->need_sync |= 1 << (i+1);
    259 
    260 		ir2_reg_create(instr, i+1, "xyzw", 0);
    261 		ir2_reg_create(instr, 0, "x", 0);
    262 
    263 		if (i == 0)
    264 			instr->sync = true;
    265 
    266 		vfetch_instrs[i] = instr;
    267 	}
    268 	ctx->so->num_vfetch_instrs = i;
    269 	ctx->cf = NULL;
    270 }
    271 
    272 /*
    273  * For vertex shaders (VS):
    274  * --- ------ -------------
    275  *
    276  *   Inputs:     R1-R(num_input)
    277  *   Constants:  C0-C(num_const-1)
    278  *   Immediates: C(num_const)-C(num_const+num_imm-1)
    279  *   Outputs:    export0-export(n) and export62, export63
    280  *      n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
    281  *   Temps:      R(num_input+1)-R(num_input+num_temps)
    282  *
    283  * R0 could be clobbered after the vertex fetch instructions.. so we
    284  * could use it for one of the temporaries.
    285  *
    286  * TODO: maybe the vertex fetch part could fetch first input into R0 as
    287  * the last vtx fetch instruction, which would let us use the same
    288  * register layout in either case.. although this is not what the blob
    289  * compiler does.
    290  *
    291  *
    292  * For frag shaders (PS):
    293  * --- ---- -------------
    294  *
    295  *   Inputs:     R0-R(num_input-1)
    296  *   Constants:  same as VS
    297  *   Immediates: same as VS
    298  *   Outputs:    export0-export(num_outputs)
    299  *   Temps:      R(num_input)-R(num_input+num_temps-1)
    300  *
    301  * In either case, immediates are are postpended to the constants
    302  * (uniforms).
    303  *
    304  */
    305 
    306 static unsigned
    307 get_temp_gpr(struct fd2_compile_context *ctx, int idx)
    308 {
    309 	unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT];
    310 	if (ctx->type == PIPE_SHADER_VERTEX)
    311 		num++;
    312 	return num;
    313 }
    314 
    315 static struct ir2_register *
    316 add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
    317 		const struct tgsi_dst_register *dst)
    318 {
    319 	unsigned flags = 0, num = 0;
    320 	char swiz[5];
    321 
    322 	switch (dst->File) {
    323 	case TGSI_FILE_OUTPUT:
    324 		flags |= IR2_REG_EXPORT;
    325 		if (ctx->type == PIPE_SHADER_VERTEX) {
    326 			if (dst->Index == ctx->position) {
    327 				num = 62;
    328 			} else if (dst->Index == ctx->psize) {
    329 				num = 63;
    330 			} else {
    331 				num = export_linkage(ctx,
    332 						ctx->output_export_idx[dst->Index]);
    333 			}
    334 		} else {
    335 			num = dst->Index;
    336 		}
    337 		break;
    338 	case TGSI_FILE_TEMPORARY:
    339 		num = get_temp_gpr(ctx, dst->Index);
    340 		break;
    341 	default:
    342 		DBG("unsupported dst register file: %s",
    343 			tgsi_file_name(dst->File));
    344 		assert(0);
    345 		break;
    346 	}
    347 
    348 	swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_';
    349 	swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_';
    350 	swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_';
    351 	swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_';
    352 	swiz[4] = '\0';
    353 
    354 	return ir2_reg_create(alu, num, swiz, flags);
    355 }
    356 
    357 static struct ir2_register *
    358 add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
    359 		const struct tgsi_src_register *src)
    360 {
    361 	static const char swiz_vals[] = {
    362 			'x', 'y', 'z', 'w',
    363 	};
    364 	char swiz[5];
    365 	unsigned flags = 0, num = 0;
    366 
    367 	switch (src->File) {
    368 	case TGSI_FILE_CONSTANT:
    369 		num = src->Index;
    370 		flags |= IR2_REG_CONST;
    371 		break;
    372 	case TGSI_FILE_INPUT:
    373 		if (ctx->type == PIPE_SHADER_VERTEX) {
    374 			num = src->Index + 1;
    375 		} else {
    376 			num = export_linkage(ctx,
    377 					ctx->input_export_idx[src->Index]);
    378 		}
    379 		break;
    380 	case TGSI_FILE_TEMPORARY:
    381 		num = get_temp_gpr(ctx, src->Index);
    382 		break;
    383 	case TGSI_FILE_IMMEDIATE:
    384 		num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT];
    385 		flags |= IR2_REG_CONST;
    386 		break;
    387 	default:
    388 		DBG("unsupported src register file: %s",
    389 			tgsi_file_name(src->File));
    390 		assert(0);
    391 		break;
    392 	}
    393 
    394 	if (src->Absolute)
    395 		flags |= IR2_REG_ABS;
    396 	if (src->Negate)
    397 		flags |= IR2_REG_NEGATE;
    398 
    399 	swiz[0] = swiz_vals[src->SwizzleX];
    400 	swiz[1] = swiz_vals[src->SwizzleY];
    401 	swiz[2] = swiz_vals[src->SwizzleZ];
    402 	swiz[3] = swiz_vals[src->SwizzleW];
    403 	swiz[4] = '\0';
    404 
    405 	if ((ctx->need_sync & ((uint64_t)1 << num)) &&
    406 			!(flags & IR2_REG_CONST)) {
    407 		alu->sync = true;
    408 		ctx->need_sync &= ~((uint64_t)1 << num);
    409 	}
    410 
    411 	return ir2_reg_create(alu, num, swiz, flags);
    412 }
    413 
    414 static void
    415 add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
    416 {
    417 	if (inst->Instruction.Saturate) {
    418 		alu->alu.vector_clamp = true;
    419 	}
    420 }
    421 
    422 static void
    423 add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
    424 {
    425 	if (inst->Instruction.Saturate) {
    426 		alu->alu.scalar_clamp = true;
    427 	}
    428 }
    429 
    430 static void
    431 add_regs_vector_1(struct fd2_compile_context *ctx,
    432 		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
    433 {
    434 	assert(inst->Instruction.NumSrcRegs == 1);
    435 	assert(inst->Instruction.NumDstRegs == 1);
    436 
    437 	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
    438 	add_src_reg(ctx, alu, &inst->Src[0].Register);
    439 	add_src_reg(ctx, alu, &inst->Src[0].Register);
    440 	add_vector_clamp(inst, alu);
    441 }
    442 
    443 static void
    444 add_regs_vector_2(struct fd2_compile_context *ctx,
    445 		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
    446 {
    447 	assert(inst->Instruction.NumSrcRegs == 2);
    448 	assert(inst->Instruction.NumDstRegs == 1);
    449 
    450 	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
    451 	add_src_reg(ctx, alu, &inst->Src[0].Register);
    452 	add_src_reg(ctx, alu, &inst->Src[1].Register);
    453 	add_vector_clamp(inst, alu);
    454 }
    455 
    456 static void
    457 add_regs_vector_3(struct fd2_compile_context *ctx,
    458 		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
    459 {
    460 	assert(inst->Instruction.NumSrcRegs == 3);
    461 	assert(inst->Instruction.NumDstRegs == 1);
    462 
    463 	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
    464 	/* maybe should re-arrange the syntax some day, but
    465 	 * in assembler/disassembler and what ir.c expects
    466 	 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
    467 	 */
    468 	add_src_reg(ctx, alu, &inst->Src[2].Register);
    469 	add_src_reg(ctx, alu, &inst->Src[0].Register);
    470 	add_src_reg(ctx, alu, &inst->Src[1].Register);
    471 	add_vector_clamp(inst, alu);
    472 }
    473 
    474 static void
    475 add_regs_dummy_vector(struct ir2_instruction *alu)
    476 {
    477 	/* create dummy, non-written vector dst/src regs
    478 	 * for unused vector instr slot:
    479 	 */
    480 	ir2_reg_create(alu, 0, "____", 0); /* vector dst */
    481 	ir2_reg_create(alu, 0, NULL, 0);   /* vector src1 */
    482 	ir2_reg_create(alu, 0, NULL, 0);   /* vector src2 */
    483 }
    484 
    485 static void
    486 add_regs_scalar_1(struct fd2_compile_context *ctx,
    487 		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
    488 {
    489 	assert(inst->Instruction.NumSrcRegs == 1);
    490 	assert(inst->Instruction.NumDstRegs == 1);
    491 
    492 	add_regs_dummy_vector(alu);
    493 
    494 	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
    495 	add_src_reg(ctx, alu, &inst->Src[0].Register);
    496 	add_scalar_clamp(inst, alu);
    497 }
    498 
    499 /*
    500  * Helpers for TGSI instructions that don't map to a single shader instr:
    501  */
    502 
    503 static void
    504 src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
    505 {
    506 	src->File      = dst->File;
    507 	src->Indirect  = dst->Indirect;
    508 	src->Dimension = dst->Dimension;
    509 	src->Index     = dst->Index;
    510 	src->Absolute  = 0;
    511 	src->Negate    = 0;
    512 	src->SwizzleX  = TGSI_SWIZZLE_X;
    513 	src->SwizzleY  = TGSI_SWIZZLE_Y;
    514 	src->SwizzleZ  = TGSI_SWIZZLE_Z;
    515 	src->SwizzleW  = TGSI_SWIZZLE_W;
    516 }
    517 
    518 /* Get internal-temp src/dst to use for a sequence of instructions
    519  * generated by a single TGSI op.
    520  */
    521 static void
    522 get_internal_temp(struct fd2_compile_context *ctx,
    523 		struct tgsi_dst_register *tmp_dst,
    524 		struct tgsi_src_register *tmp_src)
    525 {
    526 	int n;
    527 
    528 	tmp_dst->File      = TGSI_FILE_TEMPORARY;
    529 	tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
    530 	tmp_dst->Indirect  = 0;
    531 	tmp_dst->Dimension = 0;
    532 
    533 	/* assign next temporary: */
    534 	n = ctx->num_internal_temps++;
    535 	if (ctx->pred_reg != -1)
    536 		n++;
    537 
    538 	tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n;
    539 
    540 	src_from_dst(tmp_src, tmp_dst);
    541 }
    542 
    543 static void
    544 get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst,
    545 		struct tgsi_src_register *src)
    546 {
    547 	assert(ctx->pred_reg != -1);
    548 
    549 	dst->File      = TGSI_FILE_TEMPORARY;
    550 	dst->WriteMask = TGSI_WRITEMASK_W;
    551 	dst->Indirect  = 0;
    552 	dst->Dimension = 0;
    553 	dst->Index     = get_temp_gpr(ctx, ctx->pred_reg);
    554 
    555 	if (src) {
    556 		src_from_dst(src, dst);
    557 		src->SwizzleX  = TGSI_SWIZZLE_W;
    558 		src->SwizzleY  = TGSI_SWIZZLE_W;
    559 		src->SwizzleZ  = TGSI_SWIZZLE_W;
    560 		src->SwizzleW  = TGSI_SWIZZLE_W;
    561 	}
    562 }
    563 
    564 static void
    565 push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src)
    566 {
    567 	struct ir2_instruction *alu;
    568 	struct tgsi_dst_register pred_dst;
    569 
    570 	/* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
    571 	 * themselves:
    572 	 */
    573 	ctx->cf = NULL;
    574 
    575 	if (ctx->pred_depth == 0) {
    576 		/* assign predicate register: */
    577 		ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY];
    578 
    579 		get_predicate(ctx, &pred_dst, NULL);
    580 
    581 		alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs);
    582 		add_regs_dummy_vector(alu);
    583 		add_dst_reg(ctx, alu, &pred_dst);
    584 		add_src_reg(ctx, alu, src);
    585 	} else {
    586 		struct tgsi_src_register pred_src;
    587 
    588 		get_predicate(ctx, &pred_dst, &pred_src);
    589 
    590 		alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
    591 		add_dst_reg(ctx, alu, &pred_dst);
    592 		add_src_reg(ctx, alu, &pred_src);
    593 		add_src_reg(ctx, alu, src);
    594 
    595 		// XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make
    596 		// sure src reg is valid if it was calculated with a predicate
    597 		// condition..
    598 		alu->pred = IR2_PRED_NONE;
    599 	}
    600 
    601 	/* save previous pred state to restore in pop_predicate(): */
    602 	ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred;
    603 
    604 	ctx->cf = NULL;
    605 }
    606 
    607 static void
    608 pop_predicate(struct fd2_compile_context *ctx)
    609 {
    610 	/* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
    611 	 * themselves:
    612 	 */
    613 	ctx->cf = NULL;
    614 
    615 	/* restore previous predicate state: */
    616 	ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth];
    617 
    618 	if (ctx->pred_depth != 0) {
    619 		struct ir2_instruction *alu;
    620 		struct tgsi_dst_register pred_dst;
    621 		struct tgsi_src_register pred_src;
    622 
    623 		get_predicate(ctx, &pred_dst, &pred_src);
    624 
    625 		alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs);
    626 		add_regs_dummy_vector(alu);
    627 		add_dst_reg(ctx, alu, &pred_dst);
    628 		add_src_reg(ctx, alu, &pred_src);
    629 		alu->pred = IR2_PRED_NONE;
    630 	} else {
    631 		/* predicate register no longer needed: */
    632 		ctx->pred_reg = -1;
    633 	}
    634 
    635 	ctx->cf = NULL;
    636 }
    637 
    638 static void
    639 get_immediate(struct fd2_compile_context *ctx,
    640 		struct tgsi_src_register *reg, uint32_t val)
    641 {
    642 	unsigned neg, swiz, idx, i;
    643 	/* actually maps 1:1 currently.. not sure if that is safe to rely on: */
    644 	static const unsigned swiz2tgsi[] = {
    645 			TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
    646 	};
    647 
    648 	for (i = 0; i < ctx->immediate_idx; i++) {
    649 		swiz = i % 4;
    650 		idx  = i / 4;
    651 
    652 		if (ctx->so->immediates[idx].val[swiz] == val) {
    653 			neg = 0;
    654 			break;
    655 		}
    656 
    657 		if (ctx->so->immediates[idx].val[swiz] == -val) {
    658 			neg = 1;
    659 			break;
    660 		}
    661 	}
    662 
    663 	if (i == ctx->immediate_idx) {
    664 		/* need to generate a new immediate: */
    665 		swiz = i % 4;
    666 		idx  = i / 4;
    667 		neg  = 0;
    668 		ctx->so->immediates[idx].val[swiz] = val;
    669 		ctx->so->num_immediates = idx + 1;
    670 		ctx->immediate_idx++;
    671 	}
    672 
    673 	reg->File      = TGSI_FILE_IMMEDIATE;
    674 	reg->Indirect  = 0;
    675 	reg->Dimension = 0;
    676 	reg->Index     = idx;
    677 	reg->Absolute  = 0;
    678 	reg->Negate    = neg;
    679 	reg->SwizzleX  = swiz2tgsi[swiz];
    680 	reg->SwizzleY  = swiz2tgsi[swiz];
    681 	reg->SwizzleZ  = swiz2tgsi[swiz];
    682 	reg->SwizzleW  = swiz2tgsi[swiz];
    683 }
    684 
    685 /* POW(a,b) = EXP2(b * LOG2(a)) */
    686 static void
    687 translate_pow(struct fd2_compile_context *ctx,
    688 		struct tgsi_full_instruction *inst)
    689 {
    690 	struct tgsi_dst_register tmp_dst;
    691 	struct tgsi_src_register tmp_src;
    692 	struct ir2_instruction *alu;
    693 
    694 	get_internal_temp(ctx, &tmp_dst, &tmp_src);
    695 
    696 	alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP);
    697 	add_regs_dummy_vector(alu);
    698 	add_dst_reg(ctx, alu, &tmp_dst);
    699 	add_src_reg(ctx, alu, &inst->Src[0].Register);
    700 
    701 	alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
    702 	add_dst_reg(ctx, alu, &tmp_dst);
    703 	add_src_reg(ctx, alu, &tmp_src);
    704 	add_src_reg(ctx, alu, &inst->Src[1].Register);
    705 
    706 	/* NOTE: some of the instructions, like EXP_IEEE, seem hard-
    707 	 * coded to take their input from the w component.
    708 	 */
    709 	switch(inst->Dst[0].Register.WriteMask) {
    710 	case TGSI_WRITEMASK_X:
    711 		tmp_src.SwizzleW = TGSI_SWIZZLE_X;
    712 		break;
    713 	case TGSI_WRITEMASK_Y:
    714 		tmp_src.SwizzleW = TGSI_SWIZZLE_Y;
    715 		break;
    716 	case TGSI_WRITEMASK_Z:
    717 		tmp_src.SwizzleW = TGSI_SWIZZLE_Z;
    718 		break;
    719 	case TGSI_WRITEMASK_W:
    720 		tmp_src.SwizzleW = TGSI_SWIZZLE_W;
    721 		break;
    722 	default:
    723 		DBG("invalid writemask!");
    724 		assert(0);
    725 		break;
    726 	}
    727 
    728 	alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE);
    729 	add_regs_dummy_vector(alu);
    730 	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
    731 	add_src_reg(ctx, alu, &tmp_src);
    732 	add_scalar_clamp(inst, alu);
    733 }
    734 
    735 static void
    736 translate_tex(struct fd2_compile_context *ctx,
    737 		struct tgsi_full_instruction *inst, unsigned opc)
    738 {
    739 	struct ir2_instruction *instr;
    740 	struct ir2_register *reg;
    741 	struct tgsi_dst_register tmp_dst;
    742 	struct tgsi_src_register tmp_src;
    743 	const struct tgsi_src_register *coord;
    744 	bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) ||
    745 			inst->Instruction.Saturate;
    746 	int idx;
    747 
    748 	if (using_temp || (opc == TGSI_OPCODE_TXP))
    749 		get_internal_temp(ctx, &tmp_dst, &tmp_src);
    750 
    751 	if (opc == TGSI_OPCODE_TXP) {
    752 		static const char *swiz[] = {
    753 				[TGSI_SWIZZLE_X] = "xxxx",
    754 				[TGSI_SWIZZLE_Y] = "yyyy",
    755 				[TGSI_SWIZZLE_Z] = "zzzz",
    756 				[TGSI_SWIZZLE_W] = "wwww",
    757 		};
    758 
    759 		/* TXP - Projective Texture Lookup:
    760 		 *
    761 		 *  coord.x = src0.x / src.w
    762 		 *  coord.y = src0.y / src.w
    763 		 *  coord.z = src0.z / src.w
    764 		 *  coord.w = src0.w
    765 		 *  bias = 0.0
    766 		 *
    767 		 *  dst = texture_sample(unit, coord, bias)
    768 		 */
    769 		instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE);
    770 
    771 		/* MAXv: */
    772 		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w";
    773 		add_src_reg(ctx, instr, &inst->Src[0].Register);
    774 		add_src_reg(ctx, instr, &inst->Src[0].Register);
    775 
    776 		/* RECIP_IEEE: */
    777 		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___";
    778 		add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle =
    779 				swiz[inst->Src[0].Register.SwizzleW];
    780 
    781 		instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
    782 		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_";
    783 		add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx";
    784 		add_src_reg(ctx, instr, &inst->Src[0].Register);
    785 
    786 		coord = &tmp_src;
    787 	} else {
    788 		coord = &inst->Src[0].Register;
    789 	}
    790 
    791 	instr = ir2_instr_create(next_exec_cf(ctx), IR2_FETCH);
    792 	instr->fetch.opc = TEX_FETCH;
    793 	instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D);
    794 	assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases?
    795 
    796 	/* save off the tex fetch to be patched later with correct const_idx: */
    797 	idx = ctx->so->num_tfetch_instrs++;
    798 	ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index;
    799 	ctx->so->tfetch_instrs[idx].instr = instr;
    800 
    801 	add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register);
    802 	reg = add_src_reg(ctx, instr, coord);
    803 
    804 	/* blob compiler always sets 3rd component to same as 1st for 2d: */
    805 	if (inst->Texture.Texture == TGSI_TEXTURE_2D)
    806 		reg->swizzle[2] = reg->swizzle[0];
    807 
    808 	/* dst register needs to be marked for sync: */
    809 	ctx->need_sync |= 1 << instr->regs[0]->num;
    810 
    811 	/* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
    812 	instr->sync = true;
    813 
    814 	if (using_temp) {
    815 		/* texture fetch can't write directly to export, so if tgsi
    816 		 * is telling us the dst register is in output file, we load
    817 		 * the texture to a temp and the use ALU instruction to move
    818 		 * to output
    819 		 */
    820 		instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, ~0);
    821 
    822 		add_dst_reg(ctx, instr, &inst->Dst[0].Register);
    823 		add_src_reg(ctx, instr, &tmp_src);
    824 		add_src_reg(ctx, instr, &tmp_src);
    825 		add_vector_clamp(inst, instr);
    826 	}
    827 }
    828 
    829 /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
    830 /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
    831 static void
    832 translate_sge_slt(struct fd2_compile_context *ctx,
    833 		struct tgsi_full_instruction *inst, unsigned opc)
    834 {
    835 	struct ir2_instruction *instr;
    836 	struct tgsi_dst_register tmp_dst;
    837 	struct tgsi_src_register tmp_src;
    838 	struct tgsi_src_register tmp_const;
    839 	float c0, c1;
    840 
    841 	switch (opc) {
    842 	default:
    843 		assert(0);
    844 	case TGSI_OPCODE_SGE:
    845 		c0 = 1.0;
    846 		c1 = 0.0;
    847 		break;
    848 	case TGSI_OPCODE_SLT:
    849 		c0 = 0.0;
    850 		c1 = 1.0;
    851 		break;
    852 	}
    853 
    854 	get_internal_temp(ctx, &tmp_dst, &tmp_src);
    855 
    856 	instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
    857 	add_dst_reg(ctx, instr, &tmp_dst);
    858 	add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
    859 	add_src_reg(ctx, instr, &inst->Src[1].Register);
    860 
    861 	instr = ir2_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0);
    862 	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
    863 	/* maybe should re-arrange the syntax some day, but
    864 	 * in assembler/disassembler and what ir.c expects
    865 	 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
    866 	 */
    867 	get_immediate(ctx, &tmp_const, fui(c0));
    868 	add_src_reg(ctx, instr, &tmp_const);
    869 	add_src_reg(ctx, instr, &tmp_src);
    870 	get_immediate(ctx, &tmp_const, fui(c1));
    871 	add_src_reg(ctx, instr, &tmp_const);
    872 }
    873 
    874 /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
    875 static void
    876 translate_lrp(struct fd2_compile_context *ctx,
    877 		struct tgsi_full_instruction *inst,
    878 		unsigned opc)
    879 {
    880 	struct ir2_instruction *instr;
    881 	struct tgsi_dst_register tmp_dst1, tmp_dst2;
    882 	struct tgsi_src_register tmp_src1, tmp_src2;
    883 	struct tgsi_src_register tmp_const;
    884 
    885 	get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
    886 	get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
    887 
    888 	get_immediate(ctx, &tmp_const, fui(1.0));
    889 
    890 	/* tmp1 = (a * b) */
    891 	instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
    892 	add_dst_reg(ctx, instr, &tmp_dst1);
    893 	add_src_reg(ctx, instr, &inst->Src[0].Register);
    894 	add_src_reg(ctx, instr, &inst->Src[1].Register);
    895 
    896 	/* tmp2 = (1 - a) */
    897 	instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
    898 	add_dst_reg(ctx, instr, &tmp_dst2);
    899 	add_src_reg(ctx, instr, &tmp_const);
    900 	add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
    901 
    902 	/* tmp2 = tmp2 * c */
    903 	instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
    904 	add_dst_reg(ctx, instr, &tmp_dst2);
    905 	add_src_reg(ctx, instr, &tmp_src2);
    906 	add_src_reg(ctx, instr, &inst->Src[2].Register);
    907 
    908 	/* dst = tmp1 + tmp2 */
    909 	instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
    910 	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
    911 	add_src_reg(ctx, instr, &tmp_src1);
    912 	add_src_reg(ctx, instr, &tmp_src2);
    913 }
    914 
    915 static void
    916 translate_trig(struct fd2_compile_context *ctx,
    917 		struct tgsi_full_instruction *inst,
    918 		unsigned opc)
    919 {
    920 	struct ir2_instruction *instr;
    921 	struct tgsi_dst_register tmp_dst;
    922 	struct tgsi_src_register tmp_src;
    923 	struct tgsi_src_register tmp_const;
    924 	instr_scalar_opc_t op;
    925 
    926 	switch (opc) {
    927 	default:
    928 		assert(0);
    929 	case TGSI_OPCODE_SIN:
    930 		op = SIN;
    931 		break;
    932 	case TGSI_OPCODE_COS:
    933 		op = COS;
    934 		break;
    935 	}
    936 
    937 	get_internal_temp(ctx, &tmp_dst, &tmp_src);
    938 
    939 	tmp_dst.WriteMask = TGSI_WRITEMASK_X;
    940 	tmp_src.SwizzleX = tmp_src.SwizzleY =
    941 			tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X;
    942 
    943 	/* maybe should re-arrange the syntax some day, but
    944 	 * in assembler/disassembler and what ir.c expects
    945 	 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
    946 	 */
    947 	instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
    948 	add_dst_reg(ctx, instr, &tmp_dst);
    949 	get_immediate(ctx, &tmp_const, fui(0.5));
    950 	add_src_reg(ctx, instr, &tmp_const);
    951 	add_src_reg(ctx, instr, &inst->Src[0].Register);
    952 	get_immediate(ctx, &tmp_const, fui(0.159155));
    953 	add_src_reg(ctx, instr, &tmp_const);
    954 
    955 	instr = ir2_instr_create_alu(next_exec_cf(ctx), FRACv, ~0);
    956 	add_dst_reg(ctx, instr, &tmp_dst);
    957 	add_src_reg(ctx, instr, &tmp_src);
    958 	add_src_reg(ctx, instr, &tmp_src);
    959 
    960 	instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
    961 	add_dst_reg(ctx, instr, &tmp_dst);
    962 	get_immediate(ctx, &tmp_const, fui(-3.141593));
    963 	add_src_reg(ctx, instr, &tmp_const);
    964 	add_src_reg(ctx, instr, &tmp_src);
    965 	get_immediate(ctx, &tmp_const, fui(6.283185));
    966 	add_src_reg(ctx, instr, &tmp_const);
    967 
    968 	instr = ir2_instr_create_alu(next_exec_cf(ctx), ~0, op);
    969 	add_regs_dummy_vector(instr);
    970 	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
    971 	add_src_reg(ctx, instr, &tmp_src);
    972 }
    973 
    974 /*
    975  * Main part of compiler/translator:
    976  */
    977 
    978 static void
    979 translate_instruction(struct fd2_compile_context *ctx,
    980 		struct tgsi_full_instruction *inst)
    981 {
    982 	unsigned opc = inst->Instruction.Opcode;
    983 	struct ir2_instruction *instr;
    984 	static struct ir2_cf *cf;
    985 
    986 	if (opc == TGSI_OPCODE_END)
    987 		return;
    988 
    989 	if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
    990 		unsigned num = inst->Dst[0].Register.Index;
    991 		/* seems like we need to ensure that position vs param/pixel
    992 		 * exports don't end up in the same EXEC clause..  easy way
    993 		 * to do this is force a new EXEC clause on first appearance
    994 		 * of an position or param/pixel export.
    995 		 */
    996 		if ((num == ctx->position) || (num == ctx->psize)) {
    997 			if (ctx->num_position > 0) {
    998 				ctx->cf = NULL;
    999 				ir2_cf_create_alloc(ctx->so->ir, SQ_POSITION,
   1000 						ctx->num_position - 1);
   1001 				ctx->num_position = 0;
   1002 			}
   1003 		} else {
   1004 			if (ctx->num_param > 0) {
   1005 				ctx->cf = NULL;
   1006 				ir2_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL,
   1007 						ctx->num_param - 1);
   1008 				ctx->num_param = 0;
   1009 			}
   1010 		}
   1011 	}
   1012 
   1013 	cf = next_exec_cf(ctx);
   1014 
   1015 	/* TODO turn this into a table: */
   1016 	switch (opc) {
   1017 	case TGSI_OPCODE_MOV:
   1018 		instr = ir2_instr_create_alu(cf, MAXv, ~0);
   1019 		add_regs_vector_1(ctx, inst, instr);
   1020 		break;
   1021 	case TGSI_OPCODE_RCP:
   1022 		instr = ir2_instr_create_alu(cf, ~0, RECIP_IEEE);
   1023 		add_regs_scalar_1(ctx, inst, instr);
   1024 		break;
   1025 	case TGSI_OPCODE_RSQ:
   1026 		instr = ir2_instr_create_alu(cf, ~0, RECIPSQ_IEEE);
   1027 		add_regs_scalar_1(ctx, inst, instr);
   1028 		break;
   1029 	case TGSI_OPCODE_SQRT:
   1030 		instr = ir2_instr_create_alu(cf, ~0, SQRT_IEEE);
   1031 		add_regs_scalar_1(ctx, inst, instr);
   1032 		break;
   1033 	case TGSI_OPCODE_MUL:
   1034 		instr = ir2_instr_create_alu(cf, MULv, ~0);
   1035 		add_regs_vector_2(ctx, inst, instr);
   1036 		break;
   1037 	case TGSI_OPCODE_ADD:
   1038 		instr = ir2_instr_create_alu(cf, ADDv, ~0);
   1039 		add_regs_vector_2(ctx, inst, instr);
   1040 		break;
   1041 	case TGSI_OPCODE_DP3:
   1042 		instr = ir2_instr_create_alu(cf, DOT3v, ~0);
   1043 		add_regs_vector_2(ctx, inst, instr);
   1044 		break;
   1045 	case TGSI_OPCODE_DP4:
   1046 		instr = ir2_instr_create_alu(cf, DOT4v, ~0);
   1047 		add_regs_vector_2(ctx, inst, instr);
   1048 		break;
   1049 	case TGSI_OPCODE_MIN:
   1050 		instr = ir2_instr_create_alu(cf, MINv, ~0);
   1051 		add_regs_vector_2(ctx, inst, instr);
   1052 		break;
   1053 	case TGSI_OPCODE_MAX:
   1054 		instr = ir2_instr_create_alu(cf, MAXv, ~0);
   1055 		add_regs_vector_2(ctx, inst, instr);
   1056 		break;
   1057 	case TGSI_OPCODE_SLT:
   1058 	case TGSI_OPCODE_SGE:
   1059 		translate_sge_slt(ctx, inst, opc);
   1060 		break;
   1061 	case TGSI_OPCODE_MAD:
   1062 		instr = ir2_instr_create_alu(cf, MULADDv, ~0);
   1063 		add_regs_vector_3(ctx, inst, instr);
   1064 		break;
   1065 	case TGSI_OPCODE_LRP:
   1066 		translate_lrp(ctx, inst, opc);
   1067 		break;
   1068 	case TGSI_OPCODE_FRC:
   1069 		instr = ir2_instr_create_alu(cf, FRACv, ~0);
   1070 		add_regs_vector_1(ctx, inst, instr);
   1071 		break;
   1072 	case TGSI_OPCODE_FLR:
   1073 		instr = ir2_instr_create_alu(cf, FLOORv, ~0);
   1074 		add_regs_vector_1(ctx, inst, instr);
   1075 		break;
   1076 	case TGSI_OPCODE_EX2:
   1077 		instr = ir2_instr_create_alu(cf, ~0, EXP_IEEE);
   1078 		add_regs_scalar_1(ctx, inst, instr);
   1079 		break;
   1080 	case TGSI_OPCODE_POW:
   1081 		translate_pow(ctx, inst);
   1082 		break;
   1083 	case TGSI_OPCODE_COS:
   1084 	case TGSI_OPCODE_SIN:
   1085 		translate_trig(ctx, inst, opc);
   1086 		break;
   1087 	case TGSI_OPCODE_TEX:
   1088 	case TGSI_OPCODE_TXP:
   1089 		translate_tex(ctx, inst, opc);
   1090 		break;
   1091 	case TGSI_OPCODE_CMP:
   1092 		instr = ir2_instr_create_alu(cf, CNDGTEv, ~0);
   1093 		add_regs_vector_3(ctx, inst, instr);
   1094 		// TODO this should be src0 if regs where in sane order..
   1095 		instr->regs[2]->flags ^= IR2_REG_NEGATE; /* src1 */
   1096 		break;
   1097 	case TGSI_OPCODE_IF:
   1098 		push_predicate(ctx, &inst->Src[0].Register);
   1099 		ctx->so->ir->pred = IR2_PRED_EQ;
   1100 		break;
   1101 	case TGSI_OPCODE_ELSE:
   1102 		ctx->so->ir->pred = IR2_PRED_NE;
   1103 		/* not sure if this is required in all cases, but blob compiler
   1104 		 * won't combine EQ and NE in same CF:
   1105 		 */
   1106 		ctx->cf = NULL;
   1107 		break;
   1108 	case TGSI_OPCODE_ENDIF:
   1109 		pop_predicate(ctx);
   1110 		break;
   1111 	case TGSI_OPCODE_F2I:
   1112 		instr = ir2_instr_create_alu(cf, TRUNCv, ~0);
   1113 		add_regs_vector_1(ctx, inst, instr);
   1114 		break;
   1115 	default:
   1116 		DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc));
   1117 		tgsi_dump(ctx->so->tokens, 0);
   1118 		assert(0);
   1119 		break;
   1120 	}
   1121 
   1122 	/* internal temporaries are only valid for the duration of a single
   1123 	 * TGSI instruction:
   1124 	 */
   1125 	ctx->num_internal_temps = 0;
   1126 }
   1127 
   1128 static void
   1129 compile_instructions(struct fd2_compile_context *ctx)
   1130 {
   1131 	while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
   1132 		tgsi_parse_token(&ctx->parser);
   1133 
   1134 		switch (ctx->parser.FullToken.Token.Type) {
   1135 		case TGSI_TOKEN_TYPE_INSTRUCTION:
   1136 			translate_instruction(ctx,
   1137 					&ctx->parser.FullToken.FullInstruction);
   1138 			break;
   1139 		default:
   1140 			break;
   1141 		}
   1142 	}
   1143 
   1144 	ctx->cf->cf_type = EXEC_END;
   1145 }
   1146 
   1147 int
   1148 fd2_compile_shader(struct fd_program_stateobj *prog,
   1149 		struct fd2_shader_stateobj *so)
   1150 {
   1151 	struct fd2_compile_context ctx;
   1152 
   1153 	ir2_shader_destroy(so->ir);
   1154 	so->ir = ir2_shader_create();
   1155 	so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0;
   1156 
   1157 	if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK)
   1158 		return -1;
   1159 
   1160 	if (ctx.type == PIPE_SHADER_VERTEX) {
   1161 		compile_vtx_fetch(&ctx);
   1162 	} else if (ctx.type == PIPE_SHADER_FRAGMENT) {
   1163 		prog->num_exports = 0;
   1164 		memset(prog->export_linkage, 0xff,
   1165 				sizeof(prog->export_linkage));
   1166 	}
   1167 
   1168 	compile_instructions(&ctx);
   1169 
   1170 	compile_free(&ctx);
   1171 
   1172 	return 0;
   1173 }
   1174 
   1175