Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright 2010 Tom Stellard <tstellar (at) gmail.com>
      3  *
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining
      7  * a copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sublicense, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial
     16  * portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  */
     27 
     28 /**
     29  * \file
     30  */
     31 
     32 #include <stdio.h>
     33 #include "c99_math.h"
     34 #include "radeon_emulate_loops.h"
     35 #include "radeon_compiler.h"
     36 #include "radeon_compiler_util.h"
     37 #include "radeon_dataflow.h"
     38 
     39 #define VERBOSE 0
     40 
     41 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
     42 
     43 struct const_value {
     44 	struct radeon_compiler * C;
     45 	struct rc_src_register * Src;
     46 	float Value;
     47 	int HasValue;
     48 };
     49 
     50 struct count_inst {
     51 	struct radeon_compiler * C;
     52 	int Index;
     53 	rc_swizzle Swz;
     54 	float Amount;
     55 	int Unknown;
     56 	unsigned BranchDepth;
     57 };
     58 
     59 static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
     60 			struct loop_info * loop)
     61 {
     62 	unsigned int total_i = rc_recompute_ips(c);
     63 	unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
     64 	/* +1 because the program already has one iteration of the loop. */
     65 	return 1 + ((c->max_alu_insts - total_i) / loop_i);
     66 }
     67 
     68 static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
     69 						unsigned int iterations)
     70 {
     71 	unsigned int i;
     72 	struct rc_instruction * ptr;
     73 	struct rc_instruction * first = loop->BeginLoop->Next;
     74 	struct rc_instruction * last = loop->EndLoop->Prev;
     75 	struct rc_instruction * append_to = last;
     76 	rc_remove_instruction(loop->BeginLoop);
     77 	rc_remove_instruction(loop->EndLoop);
     78 	for( i = 1; i < iterations; i++){
     79 		for(ptr = first; ptr != last->Next; ptr = ptr->Next){
     80 			struct rc_instruction *new = rc_alloc_instruction(c);
     81 			memcpy(new, ptr, sizeof(struct rc_instruction));
     82 			rc_insert_instruction(append_to, new);
     83 			append_to = new;
     84 		}
     85 	}
     86 }
     87 
     88 
     89 static void update_const_value(void * data, struct rc_instruction * inst,
     90 		rc_register_file file, unsigned int index, unsigned int mask)
     91 {
     92 	struct const_value * value = data;
     93 	if(value->Src->File != file ||
     94 	   value->Src->Index != index ||
     95 	   !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
     96 		return;
     97 	}
     98 	switch(inst->U.I.Opcode){
     99 	case RC_OPCODE_MOV:
    100 		if(!rc_src_reg_is_immediate(value->C, inst->U.I.SrcReg[0].File,
    101 						inst->U.I.SrcReg[0].Index)){
    102 			return;
    103 		}
    104 		value->HasValue = 1;
    105 		value->Value =
    106 			rc_get_constant_value(value->C,
    107 					      inst->U.I.SrcReg[0].Index,
    108 					      inst->U.I.SrcReg[0].Swizzle,
    109 					      inst->U.I.SrcReg[0].Negate, 0);
    110 		break;
    111 	}
    112 }
    113 
    114 static void get_incr_amount(void * data, struct rc_instruction * inst,
    115 		rc_register_file file, unsigned int index, unsigned int mask)
    116 {
    117 	struct count_inst * count_inst = data;
    118 	int amnt_src_index;
    119 	const struct rc_opcode_info * opcode;
    120 	float amount;
    121 
    122 	if(file != RC_FILE_TEMPORARY ||
    123 	   count_inst->Index != index ||
    124 	   (1 << GET_SWZ(count_inst->Swz,0) != mask)){
    125 		return;
    126 	}
    127 
    128 	/* XXX: Give up if the counter is modified within an IF block.  We
    129 	 * could handle this case with better analysis. */
    130 	if (count_inst->BranchDepth > 0) {
    131 		count_inst->Unknown = 1;
    132 		return;
    133 	}
    134 
    135 	/* Find the index of the counter register. */
    136 	opcode = rc_get_opcode_info(inst->U.I.Opcode);
    137 	if(opcode->NumSrcRegs != 2){
    138 		count_inst->Unknown = 1;
    139 		return;
    140 	}
    141 	if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
    142 	   inst->U.I.SrcReg[0].Index == count_inst->Index &&
    143 	   inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
    144 		amnt_src_index = 1;
    145 	} else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
    146 		   inst->U.I.SrcReg[1].Index == count_inst->Index &&
    147 		   inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
    148 		amnt_src_index = 0;
    149 	}
    150 	else{
    151 		count_inst->Unknown = 1;
    152 		return;
    153 	}
    154 	if(rc_src_reg_is_immediate(count_inst->C,
    155 				inst->U.I.SrcReg[amnt_src_index].File,
    156 				inst->U.I.SrcReg[amnt_src_index].Index)){
    157 		amount = rc_get_constant_value(count_inst->C,
    158 				inst->U.I.SrcReg[amnt_src_index].Index,
    159 				inst->U.I.SrcReg[amnt_src_index].Swizzle,
    160 				inst->U.I.SrcReg[amnt_src_index].Negate, 0);
    161 	}
    162 	else{
    163 		count_inst->Unknown = 1 ;
    164 		return;
    165 	}
    166 	switch(inst->U.I.Opcode){
    167 	case RC_OPCODE_ADD:
    168 		count_inst->Amount += amount;
    169 		break;
    170 	case RC_OPCODE_SUB:
    171 		if(amnt_src_index == 0){
    172 			count_inst->Unknown = 0;
    173 			return;
    174 		}
    175 		count_inst->Amount -= amount;
    176 		break;
    177 	default:
    178 		count_inst->Unknown = 1;
    179 		return;
    180 	}
    181 }
    182 
    183 /**
    184  * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
    185  * of how many iterations they have.
    186  */
    187 static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)
    188 {
    189 	int end_loops;
    190 	int iterations;
    191 	struct count_inst count_inst;
    192 	float limit_value;
    193 	struct rc_src_register * counter;
    194 	struct rc_src_register * limit;
    195 	struct const_value counter_value;
    196 	struct rc_instruction * inst;
    197 
    198 	/* Find the counter and the upper limit */
    199 
    200 	if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[0].File,
    201 					loop->Cond->U.I.SrcReg[0].Index)){
    202 		limit = &loop->Cond->U.I.SrcReg[0];
    203 		counter = &loop->Cond->U.I.SrcReg[1];
    204 	}
    205 	else if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[1].File,
    206 					loop->Cond->U.I.SrcReg[1].Index)){
    207 		limit = &loop->Cond->U.I.SrcReg[1];
    208 		counter = &loop->Cond->U.I.SrcReg[0];
    209 	}
    210 	else{
    211 		DBG("No constant limit.\n");
    212 		return 0;
    213 	}
    214 
    215 	/* Find the initial value of the counter */
    216 	counter_value.Src = counter;
    217 	counter_value.Value = 0.0f;
    218 	counter_value.HasValue = 0;
    219 	counter_value.C = c;
    220 	for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
    221 							inst = inst->Next){
    222 		rc_for_all_writes_mask(inst, update_const_value, &counter_value);
    223 	}
    224 	if(!counter_value.HasValue){
    225 		DBG("Initial counter value cannot be determined.\n");
    226 		return 0;
    227 	}
    228 	DBG("Initial counter value is %f\n", counter_value.Value);
    229 	/* Determine how the counter is modified each loop */
    230 	count_inst.C = c;
    231 	count_inst.Index = counter->Index;
    232 	count_inst.Swz = counter->Swizzle;
    233 	count_inst.Amount = 0.0f;
    234 	count_inst.Unknown = 0;
    235 	count_inst.BranchDepth = 0;
    236 	end_loops = 1;
    237 	for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
    238 		switch(inst->U.I.Opcode){
    239 		/* XXX In the future we might want to try to unroll nested
    240 		 * loops here.*/
    241 		case RC_OPCODE_BGNLOOP:
    242 			end_loops++;
    243 			break;
    244 		case RC_OPCODE_ENDLOOP:
    245 			loop->EndLoop = inst;
    246 			end_loops--;
    247 			break;
    248 		case RC_OPCODE_BRK:
    249 			/* Don't unroll loops if it has a BRK instruction
    250 			 * other one used when testing the main conditional
    251 			 * of the loop. */
    252 
    253 			/* Make sure we haven't entered a nested loops. */
    254 			if(inst != loop->Brk && end_loops == 1) {
    255 				return 0;
    256 			}
    257 			break;
    258 		case RC_OPCODE_IF:
    259 			count_inst.BranchDepth++;
    260 			break;
    261 		case RC_OPCODE_ENDIF:
    262 			count_inst.BranchDepth--;
    263 			break;
    264 		default:
    265 			rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
    266 			if(count_inst.Unknown){
    267 				return 0;
    268 			}
    269 			break;
    270 		}
    271 	}
    272 	/* Infinite loop */
    273 	if(count_inst.Amount == 0.0f){
    274 		return 0;
    275 	}
    276 	DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
    277 	/* Calculate the number of iterations of this loop.  Keeping this
    278 	 * simple, since we only support increment and decrement loops.
    279 	 */
    280 	limit_value = rc_get_constant_value(c, limit->Index, limit->Swizzle,
    281 							limit->Negate, 0);
    282 	DBG("Limit is %f.\n", limit_value);
    283 	/* The iteration calculations are opposite of what you would expect.
    284 	 * In a normal loop, if the condition is met, then loop continues, but
    285 	 * with our loops, if the condition is met, the is exited. */
    286 	switch(loop->Cond->U.I.Opcode){
    287 	case RC_OPCODE_SGE:
    288 	case RC_OPCODE_SLE:
    289 		iterations = (int) ceilf((limit_value - counter_value.Value) /
    290 							count_inst.Amount);
    291 		break;
    292 
    293 	case RC_OPCODE_SGT:
    294 	case RC_OPCODE_SLT:
    295 		iterations = (int) floorf((limit_value - counter_value.Value) /
    296 							count_inst.Amount) + 1;
    297 		break;
    298 	default:
    299 		return 0;
    300 	}
    301 
    302 	if (c->max_alu_insts > 0
    303 		&& iterations > loop_max_possible_iterations(c, loop)) {
    304 		return 0;
    305 	}
    306 
    307 	DBG("Loop will have %d iterations.\n", iterations);
    308 
    309 	/* Prepare loop for unrolling */
    310 	rc_remove_instruction(loop->Cond);
    311 	rc_remove_instruction(loop->If);
    312 	rc_remove_instruction(loop->Brk);
    313 	rc_remove_instruction(loop->EndIf);
    314 
    315 	unroll_loop(c, loop, iterations);
    316 	loop->EndLoop = NULL;
    317 	return 1;
    318 }
    319 
    320 /**
    321  * @param c
    322  * @param loop
    323  * @param inst A pointer to a BGNLOOP instruction.
    324  * @return 1 if all of the members of loop where set.
    325  * @return 0 if there was an error and some members of loop are still NULL.
    326  */
    327 static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
    328 						struct rc_instruction * inst)
    329 {
    330 	struct rc_instruction * ptr;
    331 
    332 	if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
    333 		rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
    334 		return 0;
    335 	}
    336 
    337 	memset(loop, 0, sizeof(struct loop_info));
    338 
    339 	loop->BeginLoop = inst;
    340 
    341 	for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
    342 
    343 		if (ptr == &c->Program.Instructions) {
    344 			rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
    345 								__FUNCTION__);
    346 			return 0;
    347 		}
    348 
    349 		switch(ptr->U.I.Opcode){
    350 		case RC_OPCODE_BGNLOOP:
    351 		{
    352 			/* Nested loop, skip ahead to the end. */
    353 			unsigned int loop_depth = 1;
    354 			for(ptr = ptr->Next; ptr != &c->Program.Instructions;
    355 							ptr = ptr->Next){
    356 				if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
    357 					loop_depth++;
    358 				} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
    359 					if (!--loop_depth) {
    360 						break;
    361 					}
    362 				}
    363 			}
    364 			if (ptr == &c->Program.Instructions) {
    365 				rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
    366 								__FUNCTION__);
    367 					return 0;
    368 			}
    369 			break;
    370 		}
    371 		case RC_OPCODE_BRK:
    372 		{
    373 			struct rc_src_register *src;
    374 			if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
    375 					|| ptr->Prev->U.I.Opcode != RC_OPCODE_IF
    376 					|| loop->Brk){
    377 				continue;
    378 			}
    379 			loop->Brk = ptr;
    380 			loop->If = ptr->Prev;
    381 			loop->EndIf = ptr->Next;
    382 			src = &loop->If->U.I.SrcReg[0];
    383 
    384 			for (loop->Cond = loop->If->Prev;
    385 				loop->Cond->U.I.Opcode != RC_OPCODE_BGNLOOP;
    386 				loop->Cond = loop->Cond->Prev) {
    387 
    388 				const struct rc_dst_register *dst = &loop->Cond->U.I.DstReg;
    389 				if (dst->File == src->File &&
    390 						dst->Index == src->Index &&
    391 						dst->WriteMask & (rc_swizzle_to_writemask(src->Swizzle))) {
    392 					if (loop->Cond->U.I.Opcode == RC_OPCODE_CMP) {
    393 						src = &loop->Cond->U.I.SrcReg[0];
    394 						continue;
    395 					}
    396 					break;
    397 				}
    398 			}
    399 
    400 			if (loop->Cond->U.I.Opcode == RC_OPCODE_BGNLOOP) {
    401 				rc_error(c, "%s: Cannot find condition for if\n", __FUNCTION__);
    402 				return 0;
    403 			}
    404 			break;
    405 		}
    406 		case RC_OPCODE_ENDLOOP:
    407 			loop->EndLoop = ptr;
    408 			break;
    409 		}
    410 	}
    411 
    412 	if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
    413 					&& loop->Cond && loop->EndLoop) {
    414 		return 1;
    415 	}
    416 	return 0;
    417 }
    418 
    419 /**
    420  * This function prepares a loop to be unrolled by converting it into an if
    421  * statement.  Here is an outline of the conversion process:
    422  * BGNLOOP;                         	-> BGNLOOP;
    423  * <Additional conditional code>	-> <Additional conditional code>
    424  * SGE/SLT temp[0], temp[1], temp[2];	-> SLT/SGE temp[0], temp[1], temp[2];
    425  * IF temp[0];                      	-> IF temp[0];
    426  * BRK;                             	->
    427  * ENDIF;                           	-> <Loop Body>
    428  * <Loop Body>                      	-> ENDIF;
    429  * ENDLOOP;                         	-> ENDLOOP
    430  *
    431  * @param inst A pointer to a BGNLOOP instruction.
    432  * @return 1 for success, 0 for failure
    433  */
    434 static int transform_loop(struct emulate_loop_state * s,
    435 						struct rc_instruction * inst)
    436 {
    437 	struct loop_info * loop;
    438 
    439 	memory_pool_array_reserve(&s->C->Pool, struct loop_info,
    440 			s->Loops, s->LoopCount, s->LoopReserved, 1);
    441 
    442 	loop = &s->Loops[s->LoopCount++];
    443 
    444 	if (!build_loop_info(s->C, loop, inst)) {
    445 		rc_error(s->C, "Failed to build loop info\n");
    446 		return 0;
    447 	}
    448 
    449 	if(try_unroll_loop(s->C, loop)){
    450 		return 1;
    451 	}
    452 
    453 	/* Reverse the conditional instruction */
    454 	switch(loop->Cond->U.I.Opcode){
    455 	case RC_OPCODE_SGE:
    456 		loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
    457 		break;
    458 	case RC_OPCODE_SLT:
    459 		loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
    460 		break;
    461 	case RC_OPCODE_SLE:
    462 		loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
    463 		break;
    464 	case RC_OPCODE_SGT:
    465 		loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
    466 		break;
    467 	case RC_OPCODE_SEQ:
    468 		loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
    469 		break;
    470 	case RC_OPCODE_SNE:
    471 		loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
    472 		break;
    473 	default:
    474 		rc_error(s->C, "loop->Cond is not a conditional.\n");
    475 		return 0;
    476 	}
    477 
    478 	/* Prepare the loop to be emulated */
    479 	rc_remove_instruction(loop->Brk);
    480 	rc_remove_instruction(loop->EndIf);
    481 	rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
    482 	return 1;
    483 }
    484 
    485 void rc_transform_loops(struct radeon_compiler *c, void *user)
    486 {
    487 	struct emulate_loop_state * s = &c->loop_state;
    488 	struct rc_instruction * ptr;
    489 
    490 	memset(s, 0, sizeof(struct emulate_loop_state));
    491 	s->C = c;
    492 	for(ptr = s->C->Program.Instructions.Next;
    493 			ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
    494 		if(ptr->Type == RC_INSTRUCTION_NORMAL &&
    495 					ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
    496 			if (!transform_loop(s, ptr))
    497 				return;
    498 		}
    499 	}
    500 }
    501 
    502 void rc_unroll_loops(struct radeon_compiler *c, void *user)
    503 {
    504 	struct rc_instruction * inst;
    505 	struct loop_info loop;
    506 
    507 	for(inst = c->Program.Instructions.Next;
    508 			inst != &c->Program.Instructions; inst = inst->Next) {
    509 
    510 		if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
    511 			if (build_loop_info(c, &loop, inst)) {
    512 				try_unroll_loop(c, &loop);
    513 			}
    514 		}
    515 	}
    516 }
    517 
    518 void rc_emulate_loops(struct radeon_compiler *c, void *user)
    519 {
    520 	struct emulate_loop_state * s = &c->loop_state;
    521 	int i;
    522 	/* Iterate backwards of the list of loops so that loops that nested
    523 	 * loops are unrolled first.
    524 	 */
    525 	for( i = s->LoopCount - 1; i >= 0; i-- ){
    526 		unsigned int iterations;
    527 
    528 		if(!s->Loops[i].EndLoop){
    529 			continue;
    530 		}
    531 		iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);
    532 		unroll_loop(s->C, &s->Loops[i], iterations);
    533 	}
    534 }
    535