Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright (C) 2009 Nicolai Haehnle.
      3  * Copyright 2011 Tom Stellard <tstellar (at) gmail.com>
      4  *
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining
      8  * a copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sublicense, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial
     17  * portions of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  */
     28 
     29 #include "radeon_program_pair.h"
     30 
     31 #include <stdio.h>
     32 
     33 #include "main/glheader.h"
     34 #include "program/register_allocate.h"
     35 #include "ralloc.h"
     36 
     37 #include "r300_fragprog_swizzle.h"
     38 #include "radeon_compiler.h"
     39 #include "radeon_compiler_util.h"
     40 #include "radeon_dataflow.h"
     41 #include "radeon_list.h"
     42 #include "radeon_variable.h"
     43 
     44 #define VERBOSE 0
     45 
     46 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
     47 
     48 
     49 
     50 struct register_info {
     51 	struct live_intervals Live[4];
     52 
     53 	unsigned int Used:1;
     54 	unsigned int Allocated:1;
     55 	unsigned int File:3;
     56 	unsigned int Index:RC_REGISTER_INDEX_BITS;
     57 	unsigned int Writemask;
     58 };
     59 
     60 struct regalloc_state {
     61 	struct radeon_compiler * C;
     62 
     63 	struct register_info * Input;
     64 	unsigned int NumInputs;
     65 
     66 	struct register_info * Temporary;
     67 	unsigned int NumTemporaries;
     68 
     69 	unsigned int Simple;
     70 	int LoopEnd;
     71 };
     72 
     73 enum rc_reg_class {
     74 	RC_REG_CLASS_SINGLE,
     75 	RC_REG_CLASS_DOUBLE,
     76 	RC_REG_CLASS_TRIPLE,
     77 	RC_REG_CLASS_ALPHA,
     78 	RC_REG_CLASS_SINGLE_PLUS_ALPHA,
     79 	RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
     80 	RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
     81 	RC_REG_CLASS_X,
     82 	RC_REG_CLASS_Y,
     83 	RC_REG_CLASS_Z,
     84 	RC_REG_CLASS_XY,
     85 	RC_REG_CLASS_YZ,
     86 	RC_REG_CLASS_XZ,
     87 	RC_REG_CLASS_XW,
     88 	RC_REG_CLASS_YW,
     89 	RC_REG_CLASS_ZW,
     90 	RC_REG_CLASS_XYW,
     91 	RC_REG_CLASS_YZW,
     92 	RC_REG_CLASS_XZW,
     93 	RC_REG_CLASS_COUNT
     94 };
     95 
     96 struct rc_class {
     97 	enum rc_reg_class Class;
     98 
     99 	unsigned int WritemaskCount;
    100 
    101 	/** This is 1 if this class is being used by the register allocator
    102 	 * and 0 otherwise */
    103 	unsigned int Used;
    104 
    105 	/** This is the ID number assigned to this class by ra. */
    106 	unsigned int Id;
    107 
    108 	/** List of writemasks that belong to this class */
    109 	unsigned int Writemasks[3];
    110 
    111 
    112 };
    113 
    114 static void print_live_intervals(struct live_intervals * src)
    115 {
    116 	if (!src || !src->Used) {
    117 		DBG("(null)");
    118 		return;
    119 	}
    120 
    121 	DBG("(%i,%i)", src->Start, src->End);
    122 }
    123 
    124 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
    125 {
    126 	if (VERBOSE) {
    127 		DBG("overlap_live_intervals: ");
    128 		print_live_intervals(a);
    129 		DBG(" to ");
    130 		print_live_intervals(b);
    131 		DBG("\n");
    132 	}
    133 
    134 	if (!a->Used || !b->Used) {
    135 		DBG("    unused interval\n");
    136 		return 0;
    137 	}
    138 
    139 	if (a->Start > b->Start) {
    140 		if (a->Start < b->End) {
    141 			DBG("    overlap\n");
    142 			return 1;
    143 		}
    144 	} else if (b->Start > a->Start) {
    145 		if (b->Start < a->End) {
    146 			DBG("    overlap\n");
    147 			return 1;
    148 		}
    149 	} else { /* a->Start == b->Start */
    150 		if (a->Start != a->End && b->Start != b->End) {
    151 			DBG("    overlap\n");
    152 			return 1;
    153 		}
    154 	}
    155 
    156 	DBG("    no overlap\n");
    157 
    158 	return 0;
    159 }
    160 
    161 static void scan_read_callback(void * data, struct rc_instruction * inst,
    162 		rc_register_file file, unsigned int index, unsigned int mask)
    163 {
    164 	struct regalloc_state * s = data;
    165 	struct register_info * reg;
    166 	unsigned int i;
    167 
    168 	if (file != RC_FILE_INPUT)
    169 		return;
    170 
    171 	s->Input[index].Used = 1;
    172 	reg = &s->Input[index];
    173 
    174 	for (i = 0; i < 4; i++) {
    175 		if (!((mask >> i) & 0x1)) {
    176 			continue;
    177 		}
    178 		reg->Live[i].Used = 1;
    179 		reg->Live[i].Start = 0;
    180 		reg->Live[i].End =
    181 			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
    182 	}
    183 }
    184 
    185 static void remap_register(void * data, struct rc_instruction * inst,
    186 		rc_register_file * file, unsigned int * index)
    187 {
    188 	struct regalloc_state * s = data;
    189 	const struct register_info * reg;
    190 
    191 	if (*file == RC_FILE_TEMPORARY && s->Simple)
    192 		reg = &s->Temporary[*index];
    193 	else if (*file == RC_FILE_INPUT)
    194 		reg = &s->Input[*index];
    195 	else
    196 		return;
    197 
    198 	if (reg->Allocated) {
    199 		*index = reg->Index;
    200 	}
    201 }
    202 
    203 static void alloc_input_simple(void * data, unsigned int input,
    204 							unsigned int hwreg)
    205 {
    206 	struct regalloc_state * s = data;
    207 
    208 	if (input >= s->NumInputs)
    209 		return;
    210 
    211 	s->Input[input].Allocated = 1;
    212 	s->Input[input].File = RC_FILE_TEMPORARY;
    213 	s->Input[input].Index = hwreg;
    214 }
    215 
    216 /* This functions offsets the temporary register indices by the number
    217  * of input registers, because input registers are actually temporaries and
    218  * should not occupy the same space.
    219  *
    220  * This pass is supposed to be used to maintain correct allocation of inputs
    221  * if the standard register allocation is disabled. */
    222 static void do_regalloc_inputs_only(struct regalloc_state * s)
    223 {
    224 	for (unsigned i = 0; i < s->NumTemporaries; i++) {
    225 		s->Temporary[i].Allocated = 1;
    226 		s->Temporary[i].File = RC_FILE_TEMPORARY;
    227 		s->Temporary[i].Index = i + s->NumInputs;
    228 	}
    229 }
    230 
    231 static unsigned int is_derivative(rc_opcode op)
    232 {
    233 	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
    234 }
    235 
    236 static int find_class(
    237 	struct rc_class * classes,
    238 	unsigned int writemask,
    239 	unsigned int max_writemask_count)
    240 {
    241 	unsigned int i;
    242 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
    243 		unsigned int j;
    244 		if (classes[i].WritemaskCount > max_writemask_count) {
    245 			continue;
    246 		}
    247 		for (j = 0; j < 3; j++) {
    248 			if (classes[i].Writemasks[j] == writemask) {
    249 				return i;
    250 			}
    251 		}
    252 	}
    253 	return -1;
    254 }
    255 
    256 struct variable_get_class_cb_data {
    257 	unsigned int * can_change_writemask;
    258 	unsigned int conversion_swizzle;
    259 };
    260 
    261 static void variable_get_class_read_cb(
    262 	void * userdata,
    263 	struct rc_instruction * inst,
    264 	struct rc_pair_instruction_arg * arg,
    265 	struct rc_pair_instruction_source * src)
    266 {
    267 	struct variable_get_class_cb_data * d = userdata;
    268 	unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
    269 							d->conversion_swizzle);
    270 	if (!r300_swizzle_is_native_basic(new_swizzle)) {
    271 		*d->can_change_writemask = 0;
    272 	}
    273 }
    274 
    275 static enum rc_reg_class variable_get_class(
    276 	struct rc_variable * variable,
    277 	struct rc_class * classes)
    278 {
    279 	unsigned int i;
    280 	unsigned int can_change_writemask= 1;
    281 	unsigned int writemask = rc_variable_writemask_sum(variable);
    282 	struct rc_list * readers = rc_variable_readers_union(variable);
    283 	int class_index;
    284 
    285 	if (!variable->C->is_r500) {
    286 		struct rc_class c;
    287 		struct rc_variable * var_ptr;
    288 		/* The assumption here is that if an instruction has type
    289 		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
    290 		 * r300 and r400 can't swizzle the result of a TEX lookup. */
    291 		for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
    292 			if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
    293 				writemask = RC_MASK_XYZW;
    294 			}
    295 		}
    296 
    297 		/* Check if it is possible to do swizzle packing for r300/r400
    298 		 * without creating non-native swizzles. */
    299 		class_index = find_class(classes, writemask, 3);
    300 		if (class_index < 0) {
    301 			goto error;
    302 		}
    303 		c = classes[class_index];
    304 		if (c.WritemaskCount == 1) {
    305 			goto done;
    306 		}
    307 		for (i = 0; i < c.WritemaskCount; i++) {
    308 			struct rc_variable * var_ptr;
    309 			for (var_ptr = variable; var_ptr;
    310 						var_ptr = var_ptr->Friend) {
    311 				int j;
    312 				unsigned int conversion_swizzle =
    313 						rc_make_conversion_swizzle(
    314 						writemask, c.Writemasks[i]);
    315 				struct variable_get_class_cb_data d;
    316 				d.can_change_writemask = &can_change_writemask;
    317 				d.conversion_swizzle = conversion_swizzle;
    318 				/* If we get this far var_ptr->Inst has to
    319 				 * be a pair instruction.  If variable or any
    320 				 * of its friends are normal instructions,
    321 				 * then the writemask will be set to RC_MASK_XYZW
    322 				 * and the function will return before it gets
    323 				 * here. */
    324 				rc_pair_for_all_reads_arg(var_ptr->Inst,
    325 					variable_get_class_read_cb, &d);
    326 
    327 				for (j = 0; j < var_ptr->ReaderCount; j++) {
    328 					unsigned int old_swizzle;
    329 					unsigned int new_swizzle;
    330 					struct rc_reader r = var_ptr->Readers[j];
    331 					if (r.Inst->Type ==
    332 							RC_INSTRUCTION_PAIR ) {
    333 						old_swizzle = r.U.P.Arg->Swizzle;
    334 					} else {
    335 						old_swizzle = r.U.I.Src->Swizzle;
    336 					}
    337 					new_swizzle = rc_adjust_channels(
    338 						old_swizzle, conversion_swizzle);
    339 					if (!r300_swizzle_is_native_basic(
    340 								new_swizzle)) {
    341 						can_change_writemask = 0;
    342 						break;
    343 					}
    344 				}
    345 				if (!can_change_writemask) {
    346 					break;
    347 				}
    348 			}
    349 			if (!can_change_writemask) {
    350 				break;
    351 			}
    352 		}
    353 	}
    354 
    355 	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
    356 		/* DDX/DDY seem to always fail when their writemasks are
    357 		 * changed.*/
    358 		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
    359 		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
    360 			can_change_writemask = 0;
    361 		}
    362 	}
    363 	for ( ; readers; readers = readers->Next) {
    364 		struct rc_reader * r = readers->Item;
    365 		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
    366 			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
    367 				can_change_writemask = 0;
    368 				break;
    369 			}
    370 			/* DDX/DDY also fail when their swizzles are changed. */
    371 			if (is_derivative(r->Inst->U.P.RGB.Opcode)
    372 			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
    373 				can_change_writemask = 0;
    374 				break;
    375 			}
    376 		}
    377 	}
    378 
    379 	class_index = find_class(classes, writemask,
    380 						can_change_writemask ? 3 : 1);
    381 done:
    382 	if (class_index > -1) {
    383 		return classes[class_index].Class;
    384 	} else {
    385 error:
    386 		rc_error(variable->C,
    387 				"Could not find class for index=%u mask=%u\n",
    388 				variable->Dst.Index, writemask);
    389 		return 0;
    390 	}
    391 }
    392 
    393 static unsigned int overlap_live_intervals_array(
    394 	struct live_intervals * a,
    395 	struct live_intervals * b)
    396 {
    397 	unsigned int a_chan, b_chan;
    398 	for (a_chan = 0; a_chan < 4; a_chan++) {
    399 		for (b_chan = 0; b_chan < 4; b_chan++) {
    400 			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
    401 					return 1;
    402 			}
    403 		}
    404 	}
    405 	return 0;
    406 }
    407 
    408 static unsigned int reg_get_index(int reg)
    409 {
    410 	return reg / RC_MASK_XYZW;
    411 }
    412 
    413 static unsigned int reg_get_writemask(int reg)
    414 {
    415 	return (reg % RC_MASK_XYZW) + 1;
    416 }
    417 
    418 static int get_reg_id(unsigned int index, unsigned int writemask)
    419 {
    420 	assert(writemask);
    421 	if (writemask == 0) {
    422 		return 0;
    423 	}
    424 	return (index * RC_MASK_XYZW) + (writemask - 1);
    425 }
    426 
    427 #if VERBOSE
    428 static void print_reg(int reg)
    429 {
    430 	unsigned int index = reg_get_index(reg);
    431 	unsigned int mask = reg_get_writemask(reg);
    432 	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
    433 		mask & RC_MASK_X ? 'x' : '_',
    434 		mask & RC_MASK_Y ? 'y' : '_',
    435 		mask & RC_MASK_Z ? 'z' : '_',
    436 		mask & RC_MASK_W ? 'w' : '_');
    437 }
    438 #endif
    439 
    440 static void add_register_conflicts(
    441 	struct ra_regs * regs,
    442 	unsigned int max_temp_regs)
    443 {
    444 	unsigned int index, a_mask, b_mask;
    445 	for (index = 0; index < max_temp_regs; index++) {
    446 		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
    447 			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
    448 								b_mask++) {
    449 				if (a_mask & b_mask) {
    450 					ra_add_reg_conflict(regs,
    451 						get_reg_id(index, a_mask),
    452 						get_reg_id(index, b_mask));
    453 				}
    454 			}
    455 		}
    456 	}
    457 }
    458 
    459 static void do_advanced_regalloc(struct regalloc_state * s)
    460 {
    461 	struct rc_class rc_class_list [] = {
    462 		{RC_REG_CLASS_SINGLE, 3, 0, 0,
    463 			{RC_MASK_X,
    464 			 RC_MASK_Y,
    465 			 RC_MASK_Z}},
    466 		{RC_REG_CLASS_DOUBLE, 3, 0, 0,
    467 			{RC_MASK_X | RC_MASK_Y,
    468 			 RC_MASK_X | RC_MASK_Z,
    469 			 RC_MASK_Y | RC_MASK_Z}},
    470 		{RC_REG_CLASS_TRIPLE, 1, 0, 0,
    471 			{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
    472 			 RC_MASK_NONE,
    473 			 RC_MASK_NONE}},
    474 		{RC_REG_CLASS_ALPHA, 1, 0, 0,
    475 			{RC_MASK_W,
    476 			 RC_MASK_NONE,
    477 			 RC_MASK_NONE}},
    478 		{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
    479 			{RC_MASK_X | RC_MASK_W,
    480 			 RC_MASK_Y | RC_MASK_W,
    481 			 RC_MASK_Z | RC_MASK_W}},
    482 		{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
    483 			{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
    484 			 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
    485 			 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
    486 		{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
    487 			{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
    488 			RC_MASK_NONE,
    489 			RC_MASK_NONE}},
    490 		{RC_REG_CLASS_X, 1, 0, 0,
    491 			{RC_MASK_X,
    492 			RC_MASK_NONE,
    493 			RC_MASK_NONE}},
    494 		{RC_REG_CLASS_Y, 1, 0, 0,
    495 			{RC_MASK_Y,
    496 			RC_MASK_NONE,
    497 			RC_MASK_NONE}},
    498 		{RC_REG_CLASS_Z, 1, 0, 0,
    499 			{RC_MASK_Z,
    500 			RC_MASK_NONE,
    501 			RC_MASK_NONE}},
    502 		{RC_REG_CLASS_XY, 1, 0, 0,
    503 			{RC_MASK_X | RC_MASK_Y,
    504 			RC_MASK_NONE,
    505 			RC_MASK_NONE}},
    506 		{RC_REG_CLASS_YZ, 1, 0, 0,
    507 			{RC_MASK_Y | RC_MASK_Z,
    508 			RC_MASK_NONE,
    509 			RC_MASK_NONE}},
    510 		{RC_REG_CLASS_XZ, 1, 0, 0,
    511 			{RC_MASK_X | RC_MASK_Z,
    512 			RC_MASK_NONE,
    513 			RC_MASK_NONE}},
    514 		{RC_REG_CLASS_XW, 1, 0, 0,
    515 			{RC_MASK_X | RC_MASK_W,
    516 			RC_MASK_NONE,
    517 			RC_MASK_NONE}},
    518 		{RC_REG_CLASS_YW, 1, 0, 0,
    519 			{RC_MASK_Y | RC_MASK_W,
    520 			RC_MASK_NONE,
    521 			RC_MASK_NONE}},
    522 		{RC_REG_CLASS_ZW, 1, 0, 0,
    523 			{RC_MASK_Z | RC_MASK_W,
    524 			RC_MASK_NONE,
    525 			RC_MASK_NONE}},
    526 		{RC_REG_CLASS_XYW, 1, 0, 0,
    527 			{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
    528 			RC_MASK_NONE,
    529 			RC_MASK_NONE}},
    530 		{RC_REG_CLASS_YZW, 1, 0, 0,
    531 			{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
    532 			RC_MASK_NONE,
    533 			RC_MASK_NONE}},
    534 		{RC_REG_CLASS_XZW, 1, 0, 0,
    535 			{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
    536 			RC_MASK_NONE,
    537 			RC_MASK_NONE}}
    538 	};
    539 
    540 	unsigned int i, j, index, input_node, node_count, node_index;
    541 	unsigned int * node_classes;
    542 	unsigned int * input_classes;
    543 	struct rc_instruction * inst;
    544 	struct rc_list * var_ptr;
    545 	struct rc_list * variables;
    546 	struct ra_regs * regs;
    547 	struct ra_graph * graph;
    548 
    549 	/* Allocate the main ra data structure */
    550 	regs = ra_alloc_reg_set(NULL, s->C->max_temp_regs * RC_MASK_XYZW);
    551 
    552 	/* Get list of program variables */
    553 	variables = rc_get_variables(s->C);
    554 	node_count = rc_list_count(variables);
    555 	node_classes = memory_pool_malloc(&s->C->Pool,
    556 			node_count * sizeof(unsigned int));
    557 	input_classes = memory_pool_malloc(&s->C->Pool,
    558 			s->NumInputs * sizeof(unsigned int));
    559 
    560 	for (var_ptr = variables, node_index = 0; var_ptr;
    561 					var_ptr = var_ptr->Next, node_index++) {
    562 		unsigned int class_index;
    563 		/* Compute the live intervals */
    564 		rc_variable_compute_live_intervals(var_ptr->Item);
    565 
    566 		class_index = variable_get_class(var_ptr->Item,	rc_class_list);
    567 
    568 		/* If we haven't used this register class yet, mark it
    569 		 * as used and allocate space for it. */
    570 		if (!rc_class_list[class_index].Used) {
    571 			rc_class_list[class_index].Used = 1;
    572 			rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
    573 		}
    574 
    575 		node_classes[node_index] = rc_class_list[class_index].Id;
    576 	}
    577 
    578 
    579 	/* Assign registers to the classes */
    580 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
    581 		struct rc_class class = rc_class_list[i];
    582 		if (!class.Used) {
    583 			continue;
    584 		}
    585 
    586 		for (index = 0; index < s->C->max_temp_regs; index++) {
    587 			for (j = 0; j < class.WritemaskCount; j++) {
    588 				int reg_id = get_reg_id(index,
    589 							class.Writemasks[j]);
    590 				ra_class_add_reg(regs, class.Id, reg_id);
    591 			}
    592 		}
    593 	}
    594 
    595 	/* Add register conflicts */
    596 	add_register_conflicts(regs, s->C->max_temp_regs);
    597 
    598 	/* Calculate live intervals for input registers */
    599 	for (inst = s->C->Program.Instructions.Next;
    600 					inst != &s->C->Program.Instructions;
    601 					inst = inst->Next) {
    602 		rc_opcode op = rc_get_flow_control_inst(inst);
    603 		if (op == RC_OPCODE_BGNLOOP) {
    604 			struct rc_instruction * endloop =
    605 							rc_match_bgnloop(inst);
    606 			if (endloop->IP > s->LoopEnd) {
    607 				s->LoopEnd = endloop->IP;
    608 			}
    609 		}
    610 		rc_for_all_reads_mask(inst, scan_read_callback, s);
    611 	}
    612 
    613 	/* Create classes for input registers */
    614 	for (i = 0; i < s->NumInputs; i++) {
    615 		unsigned int chan, class_id, writemask = 0;
    616 		for (chan = 0; chan < 4; chan++) {
    617 			if (s->Input[i].Live[chan].Used) {
    618 				writemask |= (1 << chan);
    619 			}
    620 		}
    621 		s->Input[i].Writemask = writemask;
    622 		if (!writemask) {
    623 			continue;
    624 		}
    625 
    626 		class_id = ra_alloc_reg_class(regs);
    627 		input_classes[i] = class_id;
    628 		ra_class_add_reg(regs, class_id,
    629 				get_reg_id(s->Input[i].Index, writemask));
    630 	}
    631 
    632 	ra_set_finalize(regs);
    633 
    634 	graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
    635 
    636 	/* Build the interference graph */
    637 	for (var_ptr = variables, node_index = 0; var_ptr;
    638 					var_ptr = var_ptr->Next,node_index++) {
    639 		struct rc_list * a, * b;
    640 		unsigned int b_index;
    641 
    642 		ra_set_node_class(graph, node_index, node_classes[node_index]);
    643 
    644 		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
    645 						b; b = b->Next, b_index++) {
    646 			struct rc_variable * var_a = a->Item;
    647 			while (var_a) {
    648 				struct rc_variable * var_b = b->Item;
    649 				while (var_b) {
    650 					if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
    651 						ra_add_node_interference(graph,
    652 							node_index, b_index);
    653 					}
    654 					var_b = var_b->Friend;
    655 				}
    656 				var_a = var_a->Friend;
    657 			}
    658 		}
    659 	}
    660 
    661 	/* Add input registers to the interference graph */
    662 	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
    663 		if (!s->Input[i].Writemask) {
    664 			continue;
    665 		}
    666 		ra_set_node_class(graph, node_count + input_node,
    667 							input_classes[i]);
    668 		for (var_ptr = variables, node_index = 0;
    669 				var_ptr; var_ptr = var_ptr->Next, node_index++) {
    670 			struct rc_variable * var = var_ptr->Item;
    671 			if (overlap_live_intervals_array(s->Input[i].Live,
    672 								var->Live)) {
    673 				ra_add_node_interference(graph, node_index,
    674 						node_count + input_node);
    675 			}
    676 		}
    677 		/* Manually allocate a register for this input */
    678 		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
    679 				s->Input[i].Index, s->Input[i].Writemask));
    680 		input_node++;
    681 	}
    682 
    683 	if (!ra_allocate_no_spills(graph)) {
    684 		rc_error(s->C, "Ran out of hardware temporaries\n");
    685 		return;
    686 	}
    687 
    688 	/* Rewrite the registers */
    689 	for (var_ptr = variables, node_index = 0; var_ptr;
    690 				var_ptr = var_ptr->Next, node_index++) {
    691 		int reg = ra_get_node_reg(graph, node_index);
    692 		unsigned int writemask = reg_get_writemask(reg);
    693 		unsigned int index = reg_get_index(reg);
    694 		struct rc_variable * var = var_ptr->Item;
    695 
    696 		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
    697 			writemask = rc_variable_writemask_sum(var);
    698 		}
    699 
    700 		if (var->Dst.File == RC_FILE_INPUT) {
    701 			continue;
    702 		}
    703 		rc_variable_change_dst(var, index, writemask);
    704 	}
    705 
    706 	ralloc_free(graph);
    707 	ralloc_free(regs);
    708 }
    709 
    710 /**
    711  * @param user This parameter should be a pointer to an integer value.  If this
    712  * integer value is zero, then a simple register allocator will be used that
    713  * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
    714  * user is non-zero, then the regular register allocator will be used
    715  * (\sa do_regalloc).
    716   */
    717 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
    718 {
    719 	struct r300_fragment_program_compiler *c =
    720 				(struct r300_fragment_program_compiler*)cc;
    721 	struct regalloc_state s;
    722 	int * do_full_regalloc = (int*)user;
    723 
    724 	memset(&s, 0, sizeof(s));
    725 	s.C = cc;
    726 	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
    727 	s.Input = memory_pool_malloc(&cc->Pool,
    728 			s.NumInputs * sizeof(struct register_info));
    729 	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
    730 
    731 	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
    732 	s.Temporary = memory_pool_malloc(&cc->Pool,
    733 			s.NumTemporaries * sizeof(struct register_info));
    734 	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
    735 
    736 	rc_recompute_ips(s.C);
    737 
    738 	c->AllocateHwInputs(c, &alloc_input_simple, &s);
    739 	if (*do_full_regalloc) {
    740 		do_advanced_regalloc(&s);
    741 	} else {
    742 		s.Simple = 1;
    743 		do_regalloc_inputs_only(&s);
    744 	}
    745 
    746 	/* Rewrite inputs and if we are doing the simple allocation, rewrite
    747 	 * temporaries too. */
    748 	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
    749 					inst != &s.C->Program.Instructions;
    750 					inst = inst->Next) {
    751 		rc_remap_registers(inst, &remap_register, &s);
    752 	}
    753 }
    754