1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Author: Tom Stellard <thomas.stellard (at) amd.com> 24 */ 25 26 #include "radeon_compiler.h" 27 #include "radeon_compiler_util.h" 28 #include "radeon_dataflow.h" 29 #include "radeon_program.h" 30 #include "radeon_program_constants.h" 31 32 struct vert_fc_state { 33 struct radeon_compiler *C; 34 unsigned BranchDepth; 35 unsigned LoopDepth; 36 unsigned LoopsReserved; 37 int PredStack[R500_PVS_MAX_LOOP_DEPTH]; 38 int PredicateReg; 39 unsigned InCFBreak; 40 }; 41 42 static void build_pred_src( 43 struct rc_src_register * src, 44 struct vert_fc_state * fc_state) 45 { 46 src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, 47 RC_SWIZZLE_UNUSED, RC_SWIZZLE_W); 48 src->File = RC_FILE_TEMPORARY; 49 src->Index = fc_state->PredicateReg; 50 } 51 52 static void build_pred_dst( 53 struct rc_dst_register * dst, 54 struct vert_fc_state * fc_state) 55 { 56 dst->WriteMask = RC_MASK_W; 57 dst->File = RC_FILE_TEMPORARY; 58 dst->Index = fc_state->PredicateReg; 59 } 60 61 static void mark_write(void * userdata, struct rc_instruction * inst, 62 rc_register_file file, unsigned int index, unsigned int mask) 63 { 64 unsigned int * writemasks = userdata; 65 66 if (file != RC_FILE_TEMPORARY) 67 return; 68 69 if (index >= R300_VS_MAX_TEMPS) 70 return; 71 72 writemasks[index] |= mask; 73 } 74 75 static int reserve_predicate_reg(struct vert_fc_state * fc_state) 76 { 77 int i; 78 unsigned int writemasks[RC_REGISTER_MAX_INDEX]; 79 struct rc_instruction * inst; 80 memset(writemasks, 0, sizeof(writemasks)); 81 for(inst = fc_state->C->Program.Instructions.Next; 82 inst != &fc_state->C->Program.Instructions; 83 inst = inst->Next) { 84 rc_for_all_writes_mask(inst, mark_write, writemasks); 85 } 86 87 for(i = 0; i < fc_state->C->max_temp_regs; i++) { 88 /* Most of the control flow instructions only write the 89 * W component of the Predicate Register, but 90 * the docs say that ME_PRED_SET_CLR and 91 * ME_PRED_SET_RESTORE write all components of the 92 * register, so we must reserve a register that has 93 * all its components free. */ 94 if (!writemasks[i]) { 95 fc_state->PredicateReg = i; 96 break; 97 } 98 } 99 if (i == fc_state->C->max_temp_regs) { 100 rc_error(fc_state->C, "No free temporary to use for" 101 " predicate stack counter.\n"); 102 return -1; 103 } 104 return 1; 105 } 106 107 static void lower_bgnloop( 108 struct rc_instruction * inst, 109 struct vert_fc_state * fc_state) 110 { 111 struct rc_instruction * new_inst = 112 rc_insert_new_instruction(fc_state->C, inst->Prev); 113 114 if ((!fc_state->C->is_r500 115 && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH) 116 || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) { 117 rc_error(fc_state->C, "Loops are nested too deep."); 118 return; 119 } 120 121 if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) { 122 if (fc_state->PredicateReg == -1) { 123 if (reserve_predicate_reg(fc_state) == -1) { 124 return; 125 } 126 } 127 128 /* Initialize the predicate bit to true. */ 129 new_inst->U.I.Opcode = RC_ME_PRED_SEQ; 130 build_pred_dst(&new_inst->U.I.DstReg, fc_state); 131 new_inst->U.I.SrcReg[0].Index = 0; 132 new_inst->U.I.SrcReg[0].File = RC_FILE_NONE; 133 new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 134 } else { 135 fc_state->PredStack[fc_state->LoopDepth] = 136 fc_state->PredicateReg; 137 /* Copy the current predicate value to this loop's 138 * predicate register */ 139 140 /* Use the old predicate value for src0 */ 141 build_pred_src(&new_inst->U.I.SrcReg[0], fc_state); 142 143 /* Reserve this loop's predicate register */ 144 if (reserve_predicate_reg(fc_state) == -1) { 145 return; 146 } 147 148 /* Copy the old predicate value to the new register */ 149 new_inst->U.I.Opcode = RC_OPCODE_ADD; 150 build_pred_dst(&new_inst->U.I.DstReg, fc_state); 151 new_inst->U.I.SrcReg[1].Index = 0; 152 new_inst->U.I.SrcReg[1].File = RC_FILE_NONE; 153 new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000; 154 } 155 156 } 157 158 static void lower_brk( 159 struct rc_instruction * inst, 160 struct vert_fc_state * fc_state) 161 { 162 if (fc_state->LoopDepth == 1) { 163 inst->U.I.Opcode = RC_OPCODE_RCP; 164 inst->U.I.DstReg.Pred = RC_PRED_INV; 165 inst->U.I.SrcReg[0].Index = 0; 166 inst->U.I.SrcReg[0].File = RC_FILE_NONE; 167 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 168 } else { 169 inst->U.I.Opcode = RC_ME_PRED_SET_CLR; 170 inst->U.I.DstReg.Pred = RC_PRED_SET; 171 } 172 173 build_pred_dst(&inst->U.I.DstReg, fc_state); 174 } 175 176 static void lower_endloop( 177 struct rc_instruction * inst, 178 struct vert_fc_state * fc_state) 179 { 180 struct rc_instruction * new_inst = 181 rc_insert_new_instruction(fc_state->C, inst); 182 183 new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE; 184 build_pred_dst(&new_inst->U.I.DstReg, fc_state); 185 /* Restore the previous predicate register. */ 186 fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1]; 187 build_pred_src(&new_inst->U.I.SrcReg[0], fc_state); 188 } 189 190 static void lower_if( 191 struct rc_instruction * inst, 192 struct vert_fc_state * fc_state) 193 { 194 /* Reserve a temporary to use as our predicate stack counter, if we 195 * don't already have one. */ 196 if (fc_state->PredicateReg == -1) { 197 /* If we are inside a loop, the Predicate Register should 198 * have already been defined. */ 199 assert(fc_state->LoopDepth == 0); 200 201 if (reserve_predicate_reg(fc_state) == -1) { 202 return; 203 } 204 } 205 206 if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) { 207 fc_state->InCFBreak = 1; 208 } 209 if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) 210 || (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) { 211 if (fc_state->InCFBreak) { 212 inst->U.I.Opcode = RC_ME_PRED_SEQ; 213 inst->U.I.DstReg.Pred = RC_PRED_SET; 214 } else { 215 inst->U.I.Opcode = RC_ME_PRED_SNEQ; 216 } 217 } else { 218 unsigned swz; 219 inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH; 220 memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0], 221 sizeof(inst->U.I.SrcReg[1])); 222 swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle); 223 /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the 224 * w component */ 225 inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, 226 RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz); 227 build_pred_src(&inst->U.I.SrcReg[0], fc_state); 228 } 229 build_pred_dst(&inst->U.I.DstReg, fc_state); 230 } 231 232 void rc_vert_fc(struct radeon_compiler *c, void *user) 233 { 234 struct rc_instruction * inst; 235 struct vert_fc_state fc_state; 236 237 memset(&fc_state, 0, sizeof(fc_state)); 238 fc_state.PredicateReg = -1; 239 fc_state.C = c; 240 241 for(inst = c->Program.Instructions.Next; 242 inst != &c->Program.Instructions; 243 inst = inst->Next) { 244 245 switch (inst->U.I.Opcode) { 246 247 case RC_OPCODE_BGNLOOP: 248 lower_bgnloop(inst, &fc_state); 249 fc_state.LoopDepth++; 250 break; 251 252 case RC_OPCODE_BRK: 253 lower_brk(inst, &fc_state); 254 break; 255 256 case RC_OPCODE_ENDLOOP: 257 if (fc_state.BranchDepth != 0 258 || fc_state.LoopDepth != 1) { 259 lower_endloop(inst, &fc_state); 260 } 261 fc_state.LoopDepth--; 262 /* Skip PRED_RESTORE */ 263 inst = inst->Next; 264 break; 265 case RC_OPCODE_IF: 266 lower_if(inst, &fc_state); 267 fc_state.BranchDepth++; 268 break; 269 270 case RC_OPCODE_ELSE: 271 inst->U.I.Opcode = RC_ME_PRED_SET_INV; 272 build_pred_dst(&inst->U.I.DstReg, &fc_state); 273 build_pred_src(&inst->U.I.SrcReg[0], &fc_state); 274 break; 275 276 case RC_OPCODE_ENDIF: 277 if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) { 278 struct rc_instruction * to_delete = inst; 279 inst = inst->Prev; 280 rc_remove_instruction(to_delete); 281 /* XXX: Delete the endif instruction */ 282 } else { 283 inst->U.I.Opcode = RC_ME_PRED_SET_POP; 284 build_pred_dst(&inst->U.I.DstReg, &fc_state); 285 build_pred_src(&inst->U.I.SrcReg[0], &fc_state); 286 } 287 fc_state.InCFBreak = 0; 288 fc_state.BranchDepth--; 289 break; 290 291 default: 292 if (fc_state.BranchDepth || fc_state.LoopDepth) { 293 inst->U.I.DstReg.Pred = RC_PRED_SET; 294 } 295 break; 296 } 297 298 if (c->Error) { 299 return; 300 } 301 } 302 } 303