Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright (C) 2009 Nicolai Haehnle.
      3  *
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining
      7  * a copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sublicense, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial
     16  * portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  */
     27 
     28 #include "radeon_dataflow.h"
     29 
     30 #include "radeon_compiler.h"
     31 
     32 
     33 struct updatemask_state {
     34 	unsigned char Output[RC_REGISTER_MAX_INDEX];
     35 	unsigned char Temporary[RC_REGISTER_MAX_INDEX];
     36 	unsigned char Address;
     37 	unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
     38 };
     39 
     40 struct instruction_state {
     41 	unsigned char WriteMask:4;
     42 	unsigned char WriteALUResult:1;
     43 	unsigned char SrcReg[3];
     44 };
     45 
     46 struct loopinfo {
     47 	struct updatemask_state * Breaks;
     48 	unsigned int BreakCount;
     49 	unsigned int BreaksReserved;
     50 };
     51 
     52 struct branchinfo {
     53 	unsigned int HaveElse:1;
     54 
     55 	struct updatemask_state StoreEndif;
     56 	struct updatemask_state StoreElse;
     57 };
     58 
     59 struct deadcode_state {
     60 	struct radeon_compiler * C;
     61 	struct instruction_state * Instructions;
     62 
     63 	struct updatemask_state R;
     64 
     65 	struct branchinfo * BranchStack;
     66 	unsigned int BranchStackSize;
     67 	unsigned int BranchStackReserved;
     68 
     69 	struct loopinfo * LoopStack;
     70 	unsigned int LoopStackSize;
     71 	unsigned int LoopStackReserved;
     72 };
     73 
     74 
     75 static void or_updatemasks(
     76 	struct updatemask_state * dst,
     77 	struct updatemask_state * a,
     78 	struct updatemask_state * b)
     79 {
     80 	for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
     81 		dst->Output[i] = a->Output[i] | b->Output[i];
     82 		dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
     83 	}
     84 
     85 	for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
     86 		dst->Special[i] = a->Special[i] | b->Special[i];
     87 
     88 	dst->Address = a->Address | b->Address;
     89 }
     90 
     91 static void push_break(struct deadcode_state *s)
     92 {
     93 	struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
     94 	memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
     95 		loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
     96 
     97 	memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
     98 }
     99 
    100 static void push_loop(struct deadcode_state * s)
    101 {
    102 	memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
    103 			s->LoopStackSize, s->LoopStackReserved, 1);
    104 	memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
    105 }
    106 
    107 static void push_branch(struct deadcode_state * s)
    108 {
    109 	struct branchinfo * branch;
    110 
    111 	memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
    112 			s->BranchStackSize, s->BranchStackReserved, 1);
    113 
    114 	branch = &s->BranchStack[s->BranchStackSize++];
    115 	branch->HaveElse = 0;
    116 	memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
    117 }
    118 
    119 static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
    120 {
    121 	if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
    122 		if (index >= RC_REGISTER_MAX_INDEX) {
    123 			rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
    124 			return 0;
    125 		}
    126 
    127 		if (file == RC_FILE_OUTPUT)
    128 			return &s->R.Output[index];
    129 		else
    130 			return &s->R.Temporary[index];
    131 	} else if (file == RC_FILE_ADDRESS) {
    132 		return &s->R.Address;
    133 	} else if (file == RC_FILE_SPECIAL) {
    134 		if (index >= RC_NUM_SPECIAL_REGISTERS) {
    135 			rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
    136 			return 0;
    137 		}
    138 
    139 		return &s->R.Special[index];
    140 	}
    141 
    142 	return 0;
    143 }
    144 
    145 static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
    146 {
    147 	unsigned char * pused = get_used_ptr(s, file, index);
    148 	if (pused)
    149 		*pused |= mask;
    150 }
    151 
    152 static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
    153 {
    154 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    155 	struct instruction_state * insts = &s->Instructions[inst->IP];
    156 	unsigned int usedmask = 0;
    157 	unsigned int srcmasks[3];
    158 
    159 	if (opcode->HasDstReg) {
    160 		unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
    161 		if (pused) {
    162 			usedmask = *pused & inst->U.I.DstReg.WriteMask;
    163 			*pused &= ~usedmask;
    164 		}
    165 	}
    166 
    167 	insts->WriteMask |= usedmask;
    168 
    169 	if (inst->U.I.WriteALUResult) {
    170 		unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
    171 		if (pused && *pused) {
    172 			if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
    173 				usedmask |= RC_MASK_X;
    174 			else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
    175 				usedmask |= RC_MASK_W;
    176 
    177 			*pused = 0;
    178 			insts->WriteALUResult = 1;
    179 		}
    180 	}
    181 
    182 	rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
    183 
    184 	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
    185 		unsigned int refmask = 0;
    186 		unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
    187 		insts->SrcReg[src] |= newsrcmask;
    188 
    189 		for(unsigned int chan = 0; chan < 4; ++chan) {
    190 			if (GET_BIT(newsrcmask, chan))
    191 				refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
    192 		}
    193 
    194 		/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
    195 		refmask &= RC_MASK_XYZW;
    196 
    197 		if (!refmask)
    198 			continue;
    199 
    200 		mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
    201 
    202 		if (inst->U.I.SrcReg[src].RelAddr)
    203 			mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
    204 	}
    205 }
    206 
    207 static void mark_output_use(void * data, unsigned int index, unsigned int mask)
    208 {
    209 	struct deadcode_state * s = data;
    210 
    211 	mark_used(s, RC_FILE_OUTPUT, index, mask);
    212 }
    213 
    214 void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
    215 {
    216 	struct deadcode_state s;
    217 	unsigned int nr_instructions;
    218 	rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
    219 	unsigned int ip;
    220 
    221 	memset(&s, 0, sizeof(s));
    222 	s.C = c;
    223 
    224 	nr_instructions = rc_recompute_ips(c);
    225 	s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
    226 	memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
    227 
    228 	dce(c, &s, &mark_output_use);
    229 
    230 	for(struct rc_instruction * inst = c->Program.Instructions.Prev;
    231 	    inst != &c->Program.Instructions;
    232 	    inst = inst->Prev) {
    233 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    234 
    235 		switch(opcode->Opcode){
    236 		/* Mark all sources in the loop body as used before doing
    237 		 * normal deadcode analysis.  This is probably not optimal.
    238 		 */
    239 		case RC_OPCODE_ENDLOOP:
    240 		{
    241 			int endloops = 1;
    242 			struct rc_instruction *ptr;
    243 			for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
    244 				opcode = rc_get_opcode_info(ptr->U.I.Opcode);
    245 				if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
    246 					endloops--;
    247 					continue;
    248 				}
    249 				if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
    250 					endloops++;
    251 					continue;
    252 				}
    253 				if(opcode->HasDstReg){
    254 					int src = 0;
    255 					unsigned int srcmasks[3];
    256 					rc_compute_sources_for_writemask(ptr,
    257 						ptr->U.I.DstReg.WriteMask, srcmasks);
    258 					for(src=0; src < opcode->NumSrcRegs; src++){
    259 						mark_used(&s,
    260 							ptr->U.I.SrcReg[src].File,
    261 							ptr->U.I.SrcReg[src].Index,
    262 							srcmasks[src]);
    263 					}
    264 				}
    265 			}
    266 			push_loop(&s);
    267 			break;
    268 		}
    269 		case RC_OPCODE_BRK:
    270 			push_break(&s);
    271 			break;
    272 		case RC_OPCODE_BGNLOOP:
    273 		{
    274 			unsigned int i;
    275 			struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
    276 			for(i = 0; i < loop->BreakCount; i++) {
    277 				or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
    278 			}
    279 			break;
    280 		}
    281 		case RC_OPCODE_CONT:
    282 			break;
    283 		case RC_OPCODE_ENDIF:
    284 			push_branch(&s);
    285 			break;
    286 		default:
    287 			if (opcode->IsFlowControl && s.BranchStackSize) {
    288 				struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
    289 				if (opcode->Opcode == RC_OPCODE_IF) {
    290 					or_updatemasks(&s.R,
    291 							&s.R,
    292 							branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
    293 
    294 					s.BranchStackSize--;
    295 				} else if (opcode->Opcode == RC_OPCODE_ELSE) {
    296 					if (branch->HaveElse) {
    297 						rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
    298 					} else {
    299 						memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
    300 						memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
    301 						branch->HaveElse = 1;
    302 					}
    303 				} else {
    304 					rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
    305 				}
    306 			}
    307 		}
    308 
    309 		update_instruction(&s, inst);
    310 	}
    311 
    312 	ip = 0;
    313 	for(struct rc_instruction * inst = c->Program.Instructions.Next;
    314 	    inst != &c->Program.Instructions;
    315 	    inst = inst->Next, ++ip) {
    316 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    317 		int dead = 1;
    318 		unsigned int srcmasks[3];
    319 		unsigned int usemask;
    320 
    321 		if (!opcode->HasDstReg) {
    322 			dead = 0;
    323 		} else {
    324 			inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
    325 			if (s.Instructions[ip].WriteMask)
    326 				dead = 0;
    327 
    328 			if (s.Instructions[ip].WriteALUResult)
    329 				dead = 0;
    330 			else
    331 				inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
    332 		}
    333 
    334 		if (dead) {
    335 			struct rc_instruction * todelete = inst;
    336 			inst = inst->Prev;
    337 			rc_remove_instruction(todelete);
    338 			continue;
    339 		}
    340 
    341 		usemask = s.Instructions[ip].WriteMask;
    342 
    343 		if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
    344 			usemask |= RC_MASK_X;
    345 		else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
    346 			usemask |= RC_MASK_W;
    347 
    348 		rc_compute_sources_for_writemask(inst, usemask, srcmasks);
    349 
    350 		for(unsigned int src = 0; src < 3; ++src) {
    351 			for(unsigned int chan = 0; chan < 4; ++chan) {
    352 				if (!GET_BIT(srcmasks[src], chan))
    353 					SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
    354 			}
    355 		}
    356 	}
    357 
    358 	rc_calculate_inputs_outputs(c);
    359 }
    360