Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright (C) 2009 Nicolai Haehnle.
      3  * Copyright 2011 Tom Stellard <tstellar (at) gmail.com>
      4  *
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining
      8  * a copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sublicense, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial
     17  * portions of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  */
     28 
     29 #include "radeon_program_pair.h"
     30 
     31 #include <stdio.h>
     32 
     33 #include "main/glheader.h"
     34 #include "util/register_allocate.h"
     35 #include "util/u_memory.h"
     36 #include "util/ralloc.h"
     37 
     38 #include "r300_fragprog_swizzle.h"
     39 #include "radeon_compiler.h"
     40 #include "radeon_compiler_util.h"
     41 #include "radeon_dataflow.h"
     42 #include "radeon_list.h"
     43 #include "radeon_regalloc.h"
     44 #include "radeon_variable.h"
     45 
     46 #define VERBOSE 0
     47 
     48 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
     49 
     50 
     51 
     52 struct register_info {
     53 	struct live_intervals Live[4];
     54 
     55 	unsigned int Used:1;
     56 	unsigned int Allocated:1;
     57 	unsigned int File:3;
     58 	unsigned int Index:RC_REGISTER_INDEX_BITS;
     59 	unsigned int Writemask;
     60 };
     61 
     62 struct regalloc_state {
     63 	struct radeon_compiler * C;
     64 
     65 	struct register_info * Input;
     66 	unsigned int NumInputs;
     67 
     68 	struct register_info * Temporary;
     69 	unsigned int NumTemporaries;
     70 
     71 	unsigned int Simple;
     72 	int LoopEnd;
     73 };
     74 
     75 struct rc_class {
     76 	enum rc_reg_class ID;
     77 
     78 	unsigned int WritemaskCount;
     79 
     80 	/** List of writemasks that belong to this class */
     81 	unsigned int Writemasks[3];
     82 
     83 
     84 };
     85 
     86 static const struct rc_class rc_class_list [] = {
     87 	{RC_REG_CLASS_SINGLE, 3,
     88 		{RC_MASK_X,
     89 		 RC_MASK_Y,
     90 		 RC_MASK_Z}},
     91 	{RC_REG_CLASS_DOUBLE, 3,
     92 		{RC_MASK_X | RC_MASK_Y,
     93 		 RC_MASK_X | RC_MASK_Z,
     94 		 RC_MASK_Y | RC_MASK_Z}},
     95 	{RC_REG_CLASS_TRIPLE, 1,
     96 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
     97 		 RC_MASK_NONE,
     98 		 RC_MASK_NONE}},
     99 	{RC_REG_CLASS_ALPHA, 1,
    100 		{RC_MASK_W,
    101 		 RC_MASK_NONE,
    102 		 RC_MASK_NONE}},
    103 	{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3,
    104 		{RC_MASK_X | RC_MASK_W,
    105 		 RC_MASK_Y | RC_MASK_W,
    106 		 RC_MASK_Z | RC_MASK_W}},
    107 	{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3,
    108 		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
    109 		 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
    110 		 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
    111 	{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1,
    112 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
    113 		RC_MASK_NONE,
    114 		RC_MASK_NONE}},
    115 	{RC_REG_CLASS_X, 1,
    116 		{RC_MASK_X,
    117 		RC_MASK_NONE,
    118 		RC_MASK_NONE}},
    119 	{RC_REG_CLASS_Y, 1,
    120 		{RC_MASK_Y,
    121 		RC_MASK_NONE,
    122 		RC_MASK_NONE}},
    123 	{RC_REG_CLASS_Z, 1,
    124 		{RC_MASK_Z,
    125 		RC_MASK_NONE,
    126 		RC_MASK_NONE}},
    127 	{RC_REG_CLASS_XY, 1,
    128 		{RC_MASK_X | RC_MASK_Y,
    129 		RC_MASK_NONE,
    130 		RC_MASK_NONE}},
    131 	{RC_REG_CLASS_YZ, 1,
    132 		{RC_MASK_Y | RC_MASK_Z,
    133 		RC_MASK_NONE,
    134 		RC_MASK_NONE}},
    135 	{RC_REG_CLASS_XZ, 1,
    136 		{RC_MASK_X | RC_MASK_Z,
    137 		RC_MASK_NONE,
    138 		RC_MASK_NONE}},
    139 	{RC_REG_CLASS_XW, 1,
    140 		{RC_MASK_X | RC_MASK_W,
    141 		RC_MASK_NONE,
    142 		RC_MASK_NONE}},
    143 	{RC_REG_CLASS_YW, 1,
    144 		{RC_MASK_Y | RC_MASK_W,
    145 		RC_MASK_NONE,
    146 		RC_MASK_NONE}},
    147 	{RC_REG_CLASS_ZW, 1,
    148 		{RC_MASK_Z | RC_MASK_W,
    149 		RC_MASK_NONE,
    150 		RC_MASK_NONE}},
    151 	{RC_REG_CLASS_XYW, 1,
    152 		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
    153 		RC_MASK_NONE,
    154 		RC_MASK_NONE}},
    155 	{RC_REG_CLASS_YZW, 1,
    156 		{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
    157 		RC_MASK_NONE,
    158 		RC_MASK_NONE}},
    159 	{RC_REG_CLASS_XZW, 1,
    160 		{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
    161 		RC_MASK_NONE,
    162 		RC_MASK_NONE}}
    163 };
    164 
    165 static void print_live_intervals(struct live_intervals * src)
    166 {
    167 	if (!src || !src->Used) {
    168 		DBG("(null)");
    169 		return;
    170 	}
    171 
    172 	DBG("(%i,%i)", src->Start, src->End);
    173 }
    174 
    175 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
    176 {
    177 	if (VERBOSE) {
    178 		DBG("overlap_live_intervals: ");
    179 		print_live_intervals(a);
    180 		DBG(" to ");
    181 		print_live_intervals(b);
    182 		DBG("\n");
    183 	}
    184 
    185 	if (!a->Used || !b->Used) {
    186 		DBG("    unused interval\n");
    187 		return 0;
    188 	}
    189 
    190 	if (a->Start > b->Start) {
    191 		if (a->Start < b->End) {
    192 			DBG("    overlap\n");
    193 			return 1;
    194 		}
    195 	} else if (b->Start > a->Start) {
    196 		if (b->Start < a->End) {
    197 			DBG("    overlap\n");
    198 			return 1;
    199 		}
    200 	} else { /* a->Start == b->Start */
    201 		if (a->Start != a->End && b->Start != b->End) {
    202 			DBG("    overlap\n");
    203 			return 1;
    204 		}
    205 	}
    206 
    207 	DBG("    no overlap\n");
    208 
    209 	return 0;
    210 }
    211 
    212 static void scan_read_callback(void * data, struct rc_instruction * inst,
    213 		rc_register_file file, unsigned int index, unsigned int mask)
    214 {
    215 	struct regalloc_state * s = data;
    216 	struct register_info * reg;
    217 	unsigned int i;
    218 
    219 	if (file != RC_FILE_INPUT)
    220 		return;
    221 
    222 	s->Input[index].Used = 1;
    223 	reg = &s->Input[index];
    224 
    225 	for (i = 0; i < 4; i++) {
    226 		if (!((mask >> i) & 0x1)) {
    227 			continue;
    228 		}
    229 		reg->Live[i].Used = 1;
    230 		reg->Live[i].Start = 0;
    231 		reg->Live[i].End =
    232 			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
    233 	}
    234 }
    235 
    236 static void remap_register(void * data, struct rc_instruction * inst,
    237 		rc_register_file * file, unsigned int * index)
    238 {
    239 	struct regalloc_state * s = data;
    240 	const struct register_info * reg;
    241 
    242 	if (*file == RC_FILE_TEMPORARY && s->Simple)
    243 		reg = &s->Temporary[*index];
    244 	else if (*file == RC_FILE_INPUT)
    245 		reg = &s->Input[*index];
    246 	else
    247 		return;
    248 
    249 	if (reg->Allocated) {
    250 		*index = reg->Index;
    251 	}
    252 }
    253 
    254 static void alloc_input_simple(void * data, unsigned int input,
    255 							unsigned int hwreg)
    256 {
    257 	struct regalloc_state * s = data;
    258 
    259 	if (input >= s->NumInputs)
    260 		return;
    261 
    262 	s->Input[input].Allocated = 1;
    263 	s->Input[input].File = RC_FILE_TEMPORARY;
    264 	s->Input[input].Index = hwreg;
    265 }
    266 
    267 /* This functions offsets the temporary register indices by the number
    268  * of input registers, because input registers are actually temporaries and
    269  * should not occupy the same space.
    270  *
    271  * This pass is supposed to be used to maintain correct allocation of inputs
    272  * if the standard register allocation is disabled. */
    273 static void do_regalloc_inputs_only(struct regalloc_state * s)
    274 {
    275 	for (unsigned i = 0; i < s->NumTemporaries; i++) {
    276 		s->Temporary[i].Allocated = 1;
    277 		s->Temporary[i].File = RC_FILE_TEMPORARY;
    278 		s->Temporary[i].Index = i + s->NumInputs;
    279 	}
    280 }
    281 
    282 static unsigned int is_derivative(rc_opcode op)
    283 {
    284 	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
    285 }
    286 
    287 static int find_class(
    288 	const struct rc_class * classes,
    289 	unsigned int writemask,
    290 	unsigned int max_writemask_count)
    291 {
    292 	unsigned int i;
    293 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
    294 		unsigned int j;
    295 		if (classes[i].WritemaskCount > max_writemask_count) {
    296 			continue;
    297 		}
    298 		for (j = 0; j < 3; j++) {
    299 			if (classes[i].Writemasks[j] == writemask) {
    300 				return i;
    301 			}
    302 		}
    303 	}
    304 	return -1;
    305 }
    306 
    307 struct variable_get_class_cb_data {
    308 	unsigned int * can_change_writemask;
    309 	unsigned int conversion_swizzle;
    310 };
    311 
    312 static void variable_get_class_read_cb(
    313 	void * userdata,
    314 	struct rc_instruction * inst,
    315 	struct rc_pair_instruction_arg * arg,
    316 	struct rc_pair_instruction_source * src)
    317 {
    318 	struct variable_get_class_cb_data * d = userdata;
    319 	unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
    320 							d->conversion_swizzle);
    321 	if (!r300_swizzle_is_native_basic(new_swizzle)) {
    322 		*d->can_change_writemask = 0;
    323 	}
    324 }
    325 
    326 static enum rc_reg_class variable_get_class(
    327 	struct rc_variable * variable,
    328 	const struct rc_class * classes)
    329 {
    330 	unsigned int i;
    331 	unsigned int can_change_writemask= 1;
    332 	unsigned int writemask = rc_variable_writemask_sum(variable);
    333 	struct rc_list * readers = rc_variable_readers_union(variable);
    334 	int class_index;
    335 
    336 	if (!variable->C->is_r500) {
    337 		struct rc_class c;
    338 		struct rc_variable * var_ptr;
    339 		/* The assumption here is that if an instruction has type
    340 		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
    341 		 * r300 and r400 can't swizzle the result of a TEX lookup. */
    342 		for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
    343 			if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
    344 				writemask = RC_MASK_XYZW;
    345 			}
    346 		}
    347 
    348 		/* Check if it is possible to do swizzle packing for r300/r400
    349 		 * without creating non-native swizzles. */
    350 		class_index = find_class(classes, writemask, 3);
    351 		if (class_index < 0) {
    352 			goto error;
    353 		}
    354 		c = classes[class_index];
    355 		if (c.WritemaskCount == 1) {
    356 			goto done;
    357 		}
    358 		for (i = 0; i < c.WritemaskCount; i++) {
    359 			struct rc_variable * var_ptr;
    360 			for (var_ptr = variable; var_ptr;
    361 						var_ptr = var_ptr->Friend) {
    362 				int j;
    363 				unsigned int conversion_swizzle =
    364 						rc_make_conversion_swizzle(
    365 						writemask, c.Writemasks[i]);
    366 				struct variable_get_class_cb_data d;
    367 				d.can_change_writemask = &can_change_writemask;
    368 				d.conversion_swizzle = conversion_swizzle;
    369 				/* If we get this far var_ptr->Inst has to
    370 				 * be a pair instruction.  If variable or any
    371 				 * of its friends are normal instructions,
    372 				 * then the writemask will be set to RC_MASK_XYZW
    373 				 * and the function will return before it gets
    374 				 * here. */
    375 				rc_pair_for_all_reads_arg(var_ptr->Inst,
    376 					variable_get_class_read_cb, &d);
    377 
    378 				for (j = 0; j < var_ptr->ReaderCount; j++) {
    379 					unsigned int old_swizzle;
    380 					unsigned int new_swizzle;
    381 					struct rc_reader r = var_ptr->Readers[j];
    382 					if (r.Inst->Type ==
    383 							RC_INSTRUCTION_PAIR ) {
    384 						old_swizzle = r.U.P.Arg->Swizzle;
    385 					} else {
    386 						/* Source operands of TEX
    387 						 * instructions can't be
    388 						 * swizzle on r300/r400 GPUs.
    389 						 */
    390 						can_change_writemask = 0;
    391 						break;
    392 					}
    393 					new_swizzle = rc_adjust_channels(
    394 						old_swizzle, conversion_swizzle);
    395 					if (!r300_swizzle_is_native_basic(
    396 								new_swizzle)) {
    397 						can_change_writemask = 0;
    398 						break;
    399 					}
    400 				}
    401 				if (!can_change_writemask) {
    402 					break;
    403 				}
    404 			}
    405 			if (!can_change_writemask) {
    406 				break;
    407 			}
    408 		}
    409 	}
    410 
    411 	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
    412 		/* DDX/DDY seem to always fail when their writemasks are
    413 		 * changed.*/
    414 		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
    415 		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
    416 			can_change_writemask = 0;
    417 		}
    418 	}
    419 	for ( ; readers; readers = readers->Next) {
    420 		struct rc_reader * r = readers->Item;
    421 		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
    422 			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
    423 				can_change_writemask = 0;
    424 				break;
    425 			}
    426 			/* DDX/DDY also fail when their swizzles are changed. */
    427 			if (is_derivative(r->Inst->U.P.RGB.Opcode)
    428 			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
    429 				can_change_writemask = 0;
    430 				break;
    431 			}
    432 		}
    433 	}
    434 
    435 	class_index = find_class(classes, writemask,
    436 						can_change_writemask ? 3 : 1);
    437 done:
    438 	if (class_index > -1) {
    439 		return classes[class_index].ID;
    440 	} else {
    441 error:
    442 		rc_error(variable->C,
    443 				"Could not find class for index=%u mask=%u\n",
    444 				variable->Dst.Index, writemask);
    445 		return 0;
    446 	}
    447 }
    448 
    449 static unsigned int overlap_live_intervals_array(
    450 	struct live_intervals * a,
    451 	struct live_intervals * b)
    452 {
    453 	unsigned int a_chan, b_chan;
    454 	for (a_chan = 0; a_chan < 4; a_chan++) {
    455 		for (b_chan = 0; b_chan < 4; b_chan++) {
    456 			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
    457 					return 1;
    458 			}
    459 		}
    460 	}
    461 	return 0;
    462 }
    463 
    464 static unsigned int reg_get_index(int reg)
    465 {
    466 	return reg / RC_MASK_XYZW;
    467 }
    468 
    469 static unsigned int reg_get_writemask(int reg)
    470 {
    471 	return (reg % RC_MASK_XYZW) + 1;
    472 }
    473 
    474 static int get_reg_id(unsigned int index, unsigned int writemask)
    475 {
    476 	assert(writemask);
    477 	if (writemask == 0) {
    478 		return 0;
    479 	}
    480 	return (index * RC_MASK_XYZW) + (writemask - 1);
    481 }
    482 
    483 #if VERBOSE
    484 static void print_reg(int reg)
    485 {
    486 	unsigned int index = reg_get_index(reg);
    487 	unsigned int mask = reg_get_writemask(reg);
    488 	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
    489 		mask & RC_MASK_X ? 'x' : '_',
    490 		mask & RC_MASK_Y ? 'y' : '_',
    491 		mask & RC_MASK_Z ? 'z' : '_',
    492 		mask & RC_MASK_W ? 'w' : '_');
    493 }
    494 #endif
    495 
    496 static void add_register_conflicts(
    497 	struct ra_regs * regs,
    498 	unsigned int max_temp_regs)
    499 {
    500 	unsigned int index, a_mask, b_mask;
    501 	for (index = 0; index < max_temp_regs; index++) {
    502 		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
    503 			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
    504 								b_mask++) {
    505 				if (a_mask & b_mask) {
    506 					ra_add_reg_conflict(regs,
    507 						get_reg_id(index, a_mask),
    508 						get_reg_id(index, b_mask));
    509 				}
    510 			}
    511 		}
    512 	}
    513 }
    514 
    515 static void do_advanced_regalloc(struct regalloc_state * s)
    516 {
    517 
    518 	unsigned int i, input_node, node_count, node_index;
    519 	unsigned int * node_classes;
    520 	struct rc_instruction * inst;
    521 	struct rc_list * var_ptr;
    522 	struct rc_list * variables;
    523 	struct ra_graph * graph;
    524 	const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
    525 
    526 	/* Get list of program variables */
    527 	variables = rc_get_variables(s->C);
    528 	node_count = rc_list_count(variables);
    529 	node_classes = memory_pool_malloc(&s->C->Pool,
    530 			node_count * sizeof(unsigned int));
    531 
    532 	for (var_ptr = variables, node_index = 0; var_ptr;
    533 					var_ptr = var_ptr->Next, node_index++) {
    534 		unsigned int class_index;
    535 		/* Compute the live intervals */
    536 		rc_variable_compute_live_intervals(var_ptr->Item);
    537 
    538 		class_index = variable_get_class(var_ptr->Item,	rc_class_list);
    539 		node_classes[node_index] = ra_state->class_ids[class_index];
    540 	}
    541 
    542 
    543 	/* Calculate live intervals for input registers */
    544 	for (inst = s->C->Program.Instructions.Next;
    545 					inst != &s->C->Program.Instructions;
    546 					inst = inst->Next) {
    547 		rc_opcode op = rc_get_flow_control_inst(inst);
    548 		if (op == RC_OPCODE_BGNLOOP) {
    549 			struct rc_instruction * endloop =
    550 							rc_match_bgnloop(inst);
    551 			if (endloop->IP > s->LoopEnd) {
    552 				s->LoopEnd = endloop->IP;
    553 			}
    554 		}
    555 		rc_for_all_reads_mask(inst, scan_read_callback, s);
    556 	}
    557 
    558 	/* Compute the writemask for inputs. */
    559 	for (i = 0; i < s->NumInputs; i++) {
    560 		unsigned int chan, writemask = 0;
    561 		for (chan = 0; chan < 4; chan++) {
    562 			if (s->Input[i].Live[chan].Used) {
    563 				writemask |= (1 << chan);
    564 			}
    565 		}
    566 		s->Input[i].Writemask = writemask;
    567 	}
    568 
    569 	graph = ra_alloc_interference_graph(ra_state->regs,
    570 						node_count + s->NumInputs);
    571 
    572 	for (node_index = 0; node_index < node_count; node_index++) {
    573 		ra_set_node_class(graph, node_index, node_classes[node_index]);
    574 	}
    575 
    576 	/* Build the interference graph */
    577 	for (var_ptr = variables, node_index = 0; var_ptr;
    578 					var_ptr = var_ptr->Next,node_index++) {
    579 		struct rc_list * a, * b;
    580 		unsigned int b_index;
    581 
    582 		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
    583 						b; b = b->Next, b_index++) {
    584 			struct rc_variable * var_a = a->Item;
    585 			while (var_a) {
    586 				struct rc_variable * var_b = b->Item;
    587 				while (var_b) {
    588 					if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
    589 						ra_add_node_interference(graph,
    590 							node_index, b_index);
    591 					}
    592 					var_b = var_b->Friend;
    593 				}
    594 				var_a = var_a->Friend;
    595 			}
    596 		}
    597 	}
    598 
    599 	/* Add input registers to the interference graph */
    600 	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
    601 		if (!s->Input[i].Writemask) {
    602 			continue;
    603 		}
    604 		for (var_ptr = variables, node_index = 0;
    605 				var_ptr; var_ptr = var_ptr->Next, node_index++) {
    606 			struct rc_variable * var = var_ptr->Item;
    607 			if (overlap_live_intervals_array(s->Input[i].Live,
    608 								var->Live)) {
    609 				ra_add_node_interference(graph, node_index,
    610 						node_count + input_node);
    611 			}
    612 		}
    613 		/* Manually allocate a register for this input */
    614 		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
    615 				s->Input[i].Index, s->Input[i].Writemask));
    616 		input_node++;
    617 	}
    618 
    619 	if (!ra_allocate(graph)) {
    620 		rc_error(s->C, "Ran out of hardware temporaries\n");
    621 		return;
    622 	}
    623 
    624 	/* Rewrite the registers */
    625 	for (var_ptr = variables, node_index = 0; var_ptr;
    626 				var_ptr = var_ptr->Next, node_index++) {
    627 		int reg = ra_get_node_reg(graph, node_index);
    628 		unsigned int writemask = reg_get_writemask(reg);
    629 		unsigned int index = reg_get_index(reg);
    630 		struct rc_variable * var = var_ptr->Item;
    631 
    632 		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
    633 			writemask = rc_variable_writemask_sum(var);
    634 		}
    635 
    636 		if (var->Dst.File == RC_FILE_INPUT) {
    637 			continue;
    638 		}
    639 		rc_variable_change_dst(var, index, writemask);
    640 	}
    641 
    642 	ralloc_free(graph);
    643 }
    644 
    645 void rc_init_regalloc_state(struct rc_regalloc_state *s)
    646 {
    647 	unsigned i, j, index;
    648 	unsigned **ra_q_values;
    649 
    650 	/* Pre-computed q values.  This array describes the maximum number of
    651 	 * a class's [row] registers that are in conflict with a single
    652 	 * register from another class [column].
    653 	 *
    654 	 * For example:
    655 	 * q_values[0][2] is 3, because a register from class 2
    656 	 * (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from
    657 	 * class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
    658 	 * and T0.z.
    659 	 *
    660 	 * q_values[2][0] is 1, because a register from class 0
    661 	 * (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from
    662 	 * class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz
    663 	 *
    664 	 * The q values for each register class [row] will never be greater
    665 	 * than the maximum number of writemask combinations for that class.
    666 	 *
    667 	 * For example:
    668 	 *
    669 	 * Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination,
    670 	 * so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater
    671 	 * than 1.
    672 	 */
    673 	const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = {
    674 	{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
    675 	{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
    676 	{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
    677 	{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
    678 	{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
    679 	{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
    680 	{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
    681 	{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
    682 	{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
    683 	{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
    684 	{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
    685 	{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
    686 	{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
    687 	{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
    688 	{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
    689 	{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
    690 	{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
    691 	{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
    692 	{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
    693 	};
    694 
    695 	/* Allocate the main ra data structure */
    696 	s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW,
    697                                    true);
    698 
    699 	/* Create the register classes */
    700 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
    701 		const struct rc_class *class = &rc_class_list[i];
    702 		s->class_ids[class->ID] = ra_alloc_reg_class(s->regs);
    703 
    704 		/* Assign registers to the classes */
    705 		for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) {
    706 			for (j = 0; j < class->WritemaskCount; j++) {
    707 				int reg_id = get_reg_id(index,
    708 						class->Writemasks[j]);
    709 				ra_class_add_reg(s->regs,
    710 					s->class_ids[class->ID], reg_id);
    711 			}
    712 		}
    713 	}
    714 
    715 	/* Set the q values.  The q_values array is indexed based on
    716 	 * the rc_reg_class ID (RC_REG_CLASS_*) which might be
    717 	 * different than the ID assigned to that class by ra.
    718 	 * This why we need to manually construct this list.
    719 	 */
    720 	ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *));
    721 
    722 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
    723 		ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned));
    724 		for (j = 0; j < RC_REG_CLASS_COUNT; j++) {
    725 			ra_q_values[s->class_ids[i]][s->class_ids[j]] =
    726 							q_values[i][j];
    727 		}
    728 	}
    729 
    730 	/* Add register conflicts */
    731 	add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS);
    732 
    733 	ra_set_finalize(s->regs, ra_q_values);
    734 
    735 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
    736 		FREE(ra_q_values[i]);
    737 	}
    738 	FREE(ra_q_values);
    739 }
    740 
    741 void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
    742 {
    743 	ralloc_free(s->regs);
    744 }
    745 
    746 /**
    747  * @param user This parameter should be a pointer to an integer value.  If this
    748  * integer value is zero, then a simple register allocator will be used that
    749  * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
    750  * user is non-zero, then the regular register allocator will be used
    751  * (\sa do_regalloc).
    752   */
    753 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
    754 {
    755 	struct r300_fragment_program_compiler *c =
    756 				(struct r300_fragment_program_compiler*)cc;
    757 	struct regalloc_state s;
    758 	int * do_full_regalloc = (int*)user;
    759 
    760 	memset(&s, 0, sizeof(s));
    761 	s.C = cc;
    762 	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
    763 	s.Input = memory_pool_malloc(&cc->Pool,
    764 			s.NumInputs * sizeof(struct register_info));
    765 	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
    766 
    767 	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
    768 	s.Temporary = memory_pool_malloc(&cc->Pool,
    769 			s.NumTemporaries * sizeof(struct register_info));
    770 	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
    771 
    772 	rc_recompute_ips(s.C);
    773 
    774 	c->AllocateHwInputs(c, &alloc_input_simple, &s);
    775 	if (*do_full_regalloc) {
    776 		do_advanced_regalloc(&s);
    777 	} else {
    778 		s.Simple = 1;
    779 		do_regalloc_inputs_only(&s);
    780 	}
    781 
    782 	/* Rewrite inputs and if we are doing the simple allocation, rewrite
    783 	 * temporaries too. */
    784 	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
    785 					inst != &s.C->Program.Instructions;
    786 					inst = inst->Next) {
    787 		rc_remap_registers(inst, &remap_register, &s);
    788 	}
    789 }
    790