Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright 2012 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Author: Tom Stellard <thomas.stellard (at) amd.com>
     24  */
     25 
     26 #include "radeon_compiler.h"
     27 #include "radeon_compiler_util.h"
     28 #include "radeon_dataflow.h"
     29 #include "radeon_program.h"
     30 #include "radeon_program_constants.h"
     31 
     32 struct vert_fc_state {
     33 	struct radeon_compiler *C;
     34 	unsigned BranchDepth;
     35 	unsigned LoopDepth;
     36 	unsigned LoopsReserved;
     37 	int PredStack[R500_PVS_MAX_LOOP_DEPTH];
     38 	int PredicateReg;
     39 	unsigned InCFBreak;
     40 };
     41 
     42 static void build_pred_src(
     43 	struct rc_src_register * src,
     44 	struct vert_fc_state * fc_state)
     45 {
     46 	src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
     47 					RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
     48 	src->File = RC_FILE_TEMPORARY;
     49 	src->Index = fc_state->PredicateReg;
     50 }
     51 
     52 static void build_pred_dst(
     53 	struct rc_dst_register * dst,
     54 	struct vert_fc_state * fc_state)
     55 {
     56 	dst->WriteMask = RC_MASK_W;
     57 	dst->File = RC_FILE_TEMPORARY;
     58 	dst->Index = fc_state->PredicateReg;
     59 }
     60 
     61 static void mark_write(void * userdata,	struct rc_instruction * inst,
     62 		rc_register_file file,	unsigned int index, unsigned int mask)
     63 {
     64 	unsigned int * writemasks = userdata;
     65 
     66 	if (file != RC_FILE_TEMPORARY)
     67 		return;
     68 
     69 	if (index >= R300_VS_MAX_TEMPS)
     70 		return;
     71 
     72 	writemasks[index] |= mask;
     73 }
     74 
     75 static int reserve_predicate_reg(struct vert_fc_state * fc_state)
     76 {
     77 	int i;
     78 	unsigned int writemasks[RC_REGISTER_MAX_INDEX];
     79 	struct rc_instruction * inst;
     80 	memset(writemasks, 0, sizeof(writemasks));
     81 	for(inst = fc_state->C->Program.Instructions.Next;
     82 				inst != &fc_state->C->Program.Instructions;
     83 				inst = inst->Next) {
     84 		rc_for_all_writes_mask(inst, mark_write, writemasks);
     85 	}
     86 
     87 	for(i = 0; i < fc_state->C->max_temp_regs; i++) {
     88 		/* Most of the control flow instructions only write the
     89 		 * W component of the Predicate Register, but
     90 		 * the docs say that ME_PRED_SET_CLR and
     91 		 * ME_PRED_SET_RESTORE write all components of the
     92 		 * register, so we must reserve a register that has
     93 		 * all its components free. */
     94 		if (!writemasks[i]) {
     95 			fc_state->PredicateReg = i;
     96 			break;
     97 		}
     98 	}
     99 	if (i == fc_state->C->max_temp_regs) {
    100 		rc_error(fc_state->C, "No free temporary to use for"
    101 				" predicate stack counter.\n");
    102 		return -1;
    103 	}
    104 	return 1;
    105 }
    106 
    107 static void lower_bgnloop(
    108 	struct rc_instruction * inst,
    109 	struct vert_fc_state * fc_state)
    110 {
    111 	struct rc_instruction * new_inst =
    112 			rc_insert_new_instruction(fc_state->C, inst->Prev);
    113 
    114 	if ((!fc_state->C->is_r500
    115 		&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
    116 	     || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
    117 		rc_error(fc_state->C, "Loops are nested too deep.");
    118 		return;
    119 	}
    120 
    121 	if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
    122 		if (fc_state->PredicateReg == -1) {
    123 			if (reserve_predicate_reg(fc_state) == -1) {
    124 				return;
    125 			}
    126 		}
    127 
    128 		/* Initialize the predicate bit to true. */
    129 		new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
    130 		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
    131 		new_inst->U.I.SrcReg[0].Index = 0;
    132 		new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
    133 		new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
    134 	} else {
    135 		fc_state->PredStack[fc_state->LoopDepth] =
    136 						fc_state->PredicateReg;
    137 		/* Copy the current predicate value to this loop's
    138 		 * predicate register */
    139 
    140 		/* Use the old predicate value for src0 */
    141 		build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
    142 
    143 		/* Reserve this loop's predicate register */
    144 		if (reserve_predicate_reg(fc_state) == -1) {
    145 			return;
    146 		}
    147 
    148 		/* Copy the old predicate value to the new register */
    149 		new_inst->U.I.Opcode = RC_OPCODE_ADD;
    150 		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
    151 		new_inst->U.I.SrcReg[1].Index = 0;
    152 		new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
    153 		new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
    154 	}
    155 
    156 }
    157 
    158 static void lower_brk(
    159 	struct rc_instruction * inst,
    160 	struct vert_fc_state * fc_state)
    161 {
    162 	if (fc_state->LoopDepth == 1) {
    163 		inst->U.I.Opcode = RC_OPCODE_RCP;
    164 		inst->U.I.DstReg.Pred = RC_PRED_INV;
    165 		inst->U.I.SrcReg[0].Index = 0;
    166 		inst->U.I.SrcReg[0].File = RC_FILE_NONE;
    167 		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
    168 	} else {
    169 		inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
    170 		inst->U.I.DstReg.Pred = RC_PRED_SET;
    171 	}
    172 
    173 	build_pred_dst(&inst->U.I.DstReg, fc_state);
    174 }
    175 
    176 static void lower_endloop(
    177 	struct rc_instruction * inst,
    178 	struct vert_fc_state * fc_state)
    179 {
    180 	struct rc_instruction * new_inst =
    181 			rc_insert_new_instruction(fc_state->C, inst);
    182 
    183 	new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
    184 	build_pred_dst(&new_inst->U.I.DstReg, fc_state);
    185 	/* Restore the previous predicate register. */
    186 	fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
    187 	build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
    188 }
    189 
    190 static void lower_if(
    191 	struct rc_instruction * inst,
    192 	struct vert_fc_state * fc_state)
    193 {
    194 	/* Reserve a temporary to use as our predicate stack counter, if we
    195 	 * don't already have one. */
    196 	if (fc_state->PredicateReg == -1) {
    197 		/* If we are inside a loop, the Predicate Register should
    198 		 * have already been defined. */
    199 		assert(fc_state->LoopDepth == 0);
    200 
    201 		if (reserve_predicate_reg(fc_state) == -1) {
    202 			return;
    203 		}
    204 	}
    205 
    206 	if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
    207 		fc_state->InCFBreak = 1;
    208 	}
    209 	if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
    210 			|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
    211 		if (fc_state->InCFBreak) {
    212 			inst->U.I.Opcode = RC_ME_PRED_SEQ;
    213 			inst->U.I.DstReg.Pred = RC_PRED_SET;
    214 		} else {
    215 			inst->U.I.Opcode = RC_ME_PRED_SNEQ;
    216 		}
    217 	} else {
    218 		unsigned swz;
    219 		inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
    220 		memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
    221 						sizeof(inst->U.I.SrcReg[1]));
    222 		swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
    223 		/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
    224 		 * w component */
    225 		inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
    226 				RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
    227 		build_pred_src(&inst->U.I.SrcReg[0], fc_state);
    228 	}
    229 	build_pred_dst(&inst->U.I.DstReg, fc_state);
    230 }
    231 
    232 void rc_vert_fc(struct radeon_compiler *c, void *user)
    233 {
    234 	struct rc_instruction * inst;
    235 	struct vert_fc_state fc_state;
    236 
    237 	memset(&fc_state, 0, sizeof(fc_state));
    238 	fc_state.PredicateReg = -1;
    239 	fc_state.C = c;
    240 
    241 	for(inst = c->Program.Instructions.Next;
    242 					inst != &c->Program.Instructions;
    243 					inst = inst->Next) {
    244 
    245 		switch (inst->U.I.Opcode) {
    246 
    247 		case RC_OPCODE_BGNLOOP:
    248 			lower_bgnloop(inst, &fc_state);
    249 			fc_state.LoopDepth++;
    250 			break;
    251 
    252 		case RC_OPCODE_BRK:
    253 			lower_brk(inst, &fc_state);
    254 			break;
    255 
    256 		case RC_OPCODE_ENDLOOP:
    257 			if (fc_state.BranchDepth != 0
    258 					|| fc_state.LoopDepth != 1) {
    259 				lower_endloop(inst, &fc_state);
    260 			}
    261 			fc_state.LoopDepth--;
    262 			/* Skip PRED_RESTORE */
    263 			inst = inst->Next;
    264 			break;
    265 		case RC_OPCODE_IF:
    266 			lower_if(inst, &fc_state);
    267 			fc_state.BranchDepth++;
    268 			break;
    269 
    270 		case RC_OPCODE_ELSE:
    271 			inst->U.I.Opcode = RC_ME_PRED_SET_INV;
    272 			build_pred_dst(&inst->U.I.DstReg, &fc_state);
    273 			build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
    274 			break;
    275 
    276 		case RC_OPCODE_ENDIF:
    277 			if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
    278 				struct rc_instruction * to_delete = inst;
    279 				inst = inst->Prev;
    280 				rc_remove_instruction(to_delete);
    281 				/* XXX: Delete the endif instruction */
    282 			} else {
    283 				inst->U.I.Opcode = RC_ME_PRED_SET_POP;
    284 				build_pred_dst(&inst->U.I.DstReg, &fc_state);
    285 				build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
    286 			}
    287 			fc_state.InCFBreak = 0;
    288 			fc_state.BranchDepth--;
    289 			break;
    290 
    291 		default:
    292 			if (fc_state.BranchDepth || fc_state.LoopDepth) {
    293 				inst->U.I.DstReg.Pred = RC_PRED_SET;
    294 			}
    295 			break;
    296 		}
    297 
    298 		if (c->Error) {
    299 			return;
    300 		}
    301 	}
    302 }
    303