1 /* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28 #include "radeon_dataflow.h" 29 30 #include "radeon_compiler.h" 31 32 33 struct updatemask_state { 34 unsigned char Output[RC_REGISTER_MAX_INDEX]; 35 unsigned char Temporary[RC_REGISTER_MAX_INDEX]; 36 unsigned char Address; 37 unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; 38 }; 39 40 struct instruction_state { 41 unsigned char WriteMask:4; 42 unsigned char WriteALUResult:1; 43 unsigned char SrcReg[3]; 44 }; 45 46 struct loopinfo { 47 struct updatemask_state * Breaks; 48 unsigned int BreakCount; 49 unsigned int BreaksReserved; 50 }; 51 52 struct branchinfo { 53 unsigned int HaveElse:1; 54 55 struct updatemask_state StoreEndif; 56 struct updatemask_state StoreElse; 57 }; 58 59 struct deadcode_state { 60 struct radeon_compiler * C; 61 struct instruction_state * Instructions; 62 63 struct updatemask_state R; 64 65 struct branchinfo * BranchStack; 66 unsigned int BranchStackSize; 67 unsigned int BranchStackReserved; 68 69 struct loopinfo * LoopStack; 70 unsigned int LoopStackSize; 71 unsigned int LoopStackReserved; 72 }; 73 74 75 static void or_updatemasks( 76 struct updatemask_state * dst, 77 struct updatemask_state * a, 78 struct updatemask_state * b) 79 { 80 for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { 81 dst->Output[i] = a->Output[i] | b->Output[i]; 82 dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; 83 } 84 85 for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) 86 dst->Special[i] = a->Special[i] | b->Special[i]; 87 88 dst->Address = a->Address | b->Address; 89 } 90 91 static void push_break(struct deadcode_state *s) 92 { 93 struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; 94 memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, 95 loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); 96 97 memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); 98 } 99 100 static void push_loop(struct deadcode_state * s) 101 { 102 memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, 103 s->LoopStackSize, s->LoopStackReserved, 1); 104 memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); 105 } 106 107 static void push_branch(struct deadcode_state * s) 108 { 109 struct branchinfo * branch; 110 111 memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, 112 s->BranchStackSize, s->BranchStackReserved, 1); 113 114 branch = &s->BranchStack[s->BranchStackSize++]; 115 branch->HaveElse = 0; 116 memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); 117 } 118 119 static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) 120 { 121 if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { 122 if (index >= RC_REGISTER_MAX_INDEX) { 123 rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); 124 return 0; 125 } 126 127 if (file == RC_FILE_OUTPUT) 128 return &s->R.Output[index]; 129 else 130 return &s->R.Temporary[index]; 131 } else if (file == RC_FILE_ADDRESS) { 132 return &s->R.Address; 133 } else if (file == RC_FILE_SPECIAL) { 134 if (index >= RC_NUM_SPECIAL_REGISTERS) { 135 rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); 136 return 0; 137 } 138 139 return &s->R.Special[index]; 140 } 141 142 return 0; 143 } 144 145 static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) 146 { 147 unsigned char * pused = get_used_ptr(s, file, index); 148 if (pused) 149 *pused |= mask; 150 } 151 152 static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) 153 { 154 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 155 struct instruction_state * insts = &s->Instructions[inst->IP]; 156 unsigned int usedmask = 0; 157 unsigned int srcmasks[3]; 158 159 if (opcode->HasDstReg) { 160 unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); 161 if (pused) { 162 usedmask = *pused & inst->U.I.DstReg.WriteMask; 163 *pused &= ~usedmask; 164 } 165 } 166 167 insts->WriteMask |= usedmask; 168 169 if (inst->U.I.WriteALUResult) { 170 unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); 171 if (pused && *pused) { 172 if (inst->U.I.WriteALUResult == RC_ALURESULT_X) 173 usedmask |= RC_MASK_X; 174 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) 175 usedmask |= RC_MASK_W; 176 177 *pused = 0; 178 insts->WriteALUResult = 1; 179 } 180 } 181 182 rc_compute_sources_for_writemask(inst, usedmask, srcmasks); 183 184 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { 185 unsigned int refmask = 0; 186 unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; 187 insts->SrcReg[src] |= newsrcmask; 188 189 for(unsigned int chan = 0; chan < 4; ++chan) { 190 if (GET_BIT(newsrcmask, chan)) 191 refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); 192 } 193 194 /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ 195 refmask &= RC_MASK_XYZW; 196 197 if (!refmask) 198 continue; 199 200 mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); 201 202 if (inst->U.I.SrcReg[src].RelAddr) 203 mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); 204 } 205 } 206 207 static void mark_output_use(void * data, unsigned int index, unsigned int mask) 208 { 209 struct deadcode_state * s = data; 210 211 mark_used(s, RC_FILE_OUTPUT, index, mask); 212 } 213 214 void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) 215 { 216 struct deadcode_state s; 217 unsigned int nr_instructions; 218 rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; 219 unsigned int ip; 220 221 memset(&s, 0, sizeof(s)); 222 s.C = c; 223 224 nr_instructions = rc_recompute_ips(c); 225 s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); 226 memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); 227 228 dce(c, &s, &mark_output_use); 229 230 for(struct rc_instruction * inst = c->Program.Instructions.Prev; 231 inst != &c->Program.Instructions; 232 inst = inst->Prev) { 233 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 234 235 switch(opcode->Opcode){ 236 /* Mark all sources in the loop body as used before doing 237 * normal deadcode analysis. This is probably not optimal. 238 */ 239 case RC_OPCODE_ENDLOOP: 240 { 241 int endloops = 1; 242 struct rc_instruction *ptr; 243 for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ 244 opcode = rc_get_opcode_info(ptr->U.I.Opcode); 245 if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ 246 endloops--; 247 continue; 248 } 249 if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ 250 endloops++; 251 continue; 252 } 253 if(opcode->HasDstReg){ 254 int src = 0; 255 unsigned int srcmasks[3]; 256 rc_compute_sources_for_writemask(ptr, 257 ptr->U.I.DstReg.WriteMask, srcmasks); 258 for(src=0; src < opcode->NumSrcRegs; src++){ 259 mark_used(&s, 260 ptr->U.I.SrcReg[src].File, 261 ptr->U.I.SrcReg[src].Index, 262 srcmasks[src]); 263 } 264 } 265 } 266 push_loop(&s); 267 break; 268 } 269 case RC_OPCODE_BRK: 270 push_break(&s); 271 break; 272 case RC_OPCODE_BGNLOOP: 273 { 274 unsigned int i; 275 struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; 276 for(i = 0; i < loop->BreakCount; i++) { 277 or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); 278 } 279 break; 280 } 281 case RC_OPCODE_CONT: 282 break; 283 case RC_OPCODE_ENDIF: 284 push_branch(&s); 285 break; 286 default: 287 if (opcode->IsFlowControl && s.BranchStackSize) { 288 struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; 289 if (opcode->Opcode == RC_OPCODE_IF) { 290 or_updatemasks(&s.R, 291 &s.R, 292 branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); 293 294 s.BranchStackSize--; 295 } else if (opcode->Opcode == RC_OPCODE_ELSE) { 296 if (branch->HaveElse) { 297 rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); 298 } else { 299 memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); 300 memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); 301 branch->HaveElse = 1; 302 } 303 } else { 304 rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); 305 } 306 } 307 } 308 309 update_instruction(&s, inst); 310 } 311 312 ip = 0; 313 for(struct rc_instruction * inst = c->Program.Instructions.Next; 314 inst != &c->Program.Instructions; 315 inst = inst->Next, ++ip) { 316 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 317 int dead = 1; 318 unsigned int srcmasks[3]; 319 unsigned int usemask; 320 321 if (!opcode->HasDstReg) { 322 dead = 0; 323 } else { 324 inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; 325 if (s.Instructions[ip].WriteMask) 326 dead = 0; 327 328 if (s.Instructions[ip].WriteALUResult) 329 dead = 0; 330 else 331 inst->U.I.WriteALUResult = RC_ALURESULT_NONE; 332 } 333 334 if (dead) { 335 struct rc_instruction * todelete = inst; 336 inst = inst->Prev; 337 rc_remove_instruction(todelete); 338 continue; 339 } 340 341 usemask = s.Instructions[ip].WriteMask; 342 343 if (inst->U.I.WriteALUResult == RC_ALURESULT_X) 344 usemask |= RC_MASK_X; 345 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) 346 usemask |= RC_MASK_W; 347 348 rc_compute_sources_for_writemask(inst, usemask, srcmasks); 349 350 for(unsigned int src = 0; src < 3; ++src) { 351 for(unsigned int chan = 0; chan < 4; ++chan) { 352 if (!GET_BIT(srcmasks[src], chan)) 353 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); 354 } 355 } 356 } 357 358 rc_calculate_inputs_outputs(c); 359 } 360