Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright (C) 2009 Nicolai Haehnle.
      3  *
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining
      7  * a copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sublicense, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial
     16  * portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  */
     27 
     28 #include "radeon_program_pair.h"
     29 
     30 #include <stdio.h>
     31 
     32 #include "radeon_compiler.h"
     33 #include "radeon_compiler_util.h"
     34 #include "radeon_dataflow.h"
     35 #include "radeon_list.h"
     36 #include "radeon_variable.h"
     37 
     38 #include "util/u_debug.h"
     39 
     40 #define VERBOSE 0
     41 
     42 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
     43 
     44 struct schedule_instruction {
     45 	struct rc_instruction * Instruction;
     46 
     47 	/** Next instruction in the linked list of ready instructions. */
     48 	struct schedule_instruction *NextReady;
     49 
     50 	/** Values that this instruction reads and writes */
     51 	struct reg_value * WriteValues[4];
     52 	struct reg_value * ReadValues[12];
     53 	unsigned int NumWriteValues:3;
     54 	unsigned int NumReadValues:4;
     55 
     56 	/**
     57 	 * Number of (read and write) dependencies that must be resolved before
     58 	 * this instruction can be scheduled.
     59 	 */
     60 	unsigned int NumDependencies:5;
     61 
     62 	/** List of all readers (see rc_get_readers() for the definition of
     63 	 * "all readers"), even those outside the basic block this instruction
     64 	 * lives in. */
     65 	struct rc_reader_data GlobalReaders;
     66 
     67 	/** If the scheduler has paired an RGB and an Alpha instruction together,
     68 	 * PairedInst references the alpha insturction's dependency information.
     69 	 */
     70 	struct schedule_instruction * PairedInst;
     71 
     72 	/** This scheduler uses the value of Score to determine which
     73 	 * instruction to schedule.  Instructions with a higher value of Score
     74 	 * will be scheduled first. */
     75 	int Score;
     76 
     77 	/** The number of components that read from a TEX instruction. */
     78 	unsigned TexReadCount;
     79 
     80 	/** For TEX instructions a list of readers */
     81 	struct rc_list * TexReaders;
     82 };
     83 
     84 
     85 /**
     86  * Used to keep track of which instructions read a value.
     87  */
     88 struct reg_value_reader {
     89 	struct schedule_instruction *Reader;
     90 	struct reg_value_reader *Next;
     91 };
     92 
     93 /**
     94  * Used to keep track which values are stored in each component of a
     95  * RC_FILE_TEMPORARY.
     96  */
     97 struct reg_value {
     98 	struct schedule_instruction * Writer;
     99 
    100 	/**
    101 	 * Unordered linked list of instructions that read from this value.
    102 	 * When this value becomes available, we increase all readers'
    103 	 * dependency count.
    104 	 */
    105 	struct reg_value_reader *Readers;
    106 
    107 	/**
    108 	 * Number of readers of this value. This is decremented each time
    109 	 * a reader of the value is committed.
    110 	 * When the reader cound reaches zero, the dependency count
    111 	 * of the instruction writing \ref Next is decremented.
    112 	 */
    113 	unsigned int NumReaders;
    114 
    115 	struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
    116 };
    117 
    118 struct register_state {
    119 	struct reg_value * Values[4];
    120 };
    121 
    122 struct remap_reg {
    123 	struct rc_instruciont * Inst;
    124 	unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
    125 	unsigned int OldSwizzle:3;
    126 	unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
    127 	unsigned int NewSwizzle:3;
    128 	unsigned int OnlyTexReads:1;
    129 	struct remap_reg * Next;
    130 };
    131 
    132 struct schedule_state {
    133 	struct radeon_compiler * C;
    134 	struct schedule_instruction * Current;
    135 	/** Array of the previous writers of Current's destination register
    136 	 * indexed by channel. */
    137 	struct schedule_instruction * PrevWriter[4];
    138 
    139 	struct register_state Temporary[RC_REGISTER_MAX_INDEX];
    140 
    141 	/**
    142 	 * Linked lists of instructions that can be scheduled right now,
    143 	 * based on which ALU/TEX resources they require.
    144 	 */
    145 	/*@{*/
    146 	struct schedule_instruction *ReadyFullALU;
    147 	struct schedule_instruction *ReadyRGB;
    148 	struct schedule_instruction *ReadyAlpha;
    149 	struct schedule_instruction *ReadyTEX;
    150 	/*@}*/
    151 	struct rc_list *PendingTEX;
    152 
    153 	void (*CalcScore)(struct schedule_instruction *);
    154 	long max_tex_group;
    155 	unsigned PrevBlockHasTex:1;
    156 	unsigned TEXCount;
    157 	unsigned Opt:1;
    158 };
    159 
    160 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
    161 		rc_register_file file, unsigned int index, unsigned int chan)
    162 {
    163 	if (file != RC_FILE_TEMPORARY)
    164 		return 0;
    165 
    166 	if (index >= RC_REGISTER_MAX_INDEX) {
    167 		rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
    168 		return 0;
    169 	}
    170 
    171 	return &s->Temporary[index].Values[chan];
    172 }
    173 
    174 static unsigned get_tex_read_count(struct schedule_instruction * sinst)
    175 {
    176 	unsigned tex_read_count = sinst->TexReadCount;
    177 	if (sinst->PairedInst) {
    178 		tex_read_count += sinst->PairedInst->TexReadCount;
    179 	}
    180 	return tex_read_count;
    181 }
    182 
    183 #if VERBOSE
    184 static void print_list(struct schedule_instruction * sinst)
    185 {
    186 	struct schedule_instruction * ptr;
    187 	for (ptr = sinst; ptr; ptr=ptr->NextReady) {
    188 		unsigned tex_read_count = get_tex_read_count(ptr);
    189 		unsigned score = sinst->Score;
    190 		fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
    191 						tex_read_count);
    192 	}
    193 	fprintf(stderr, "\n");
    194 }
    195 #endif
    196 
    197 static void remove_inst_from_list(struct schedule_instruction ** list,
    198 					struct schedule_instruction * inst)
    199 {
    200 	struct schedule_instruction * prev = NULL;
    201 	struct schedule_instruction * list_ptr;
    202 	for (list_ptr = *list; list_ptr; prev = list_ptr,
    203 					list_ptr = list_ptr->NextReady) {
    204 		if (list_ptr == inst) {
    205 			if (prev) {
    206 				prev->NextReady = inst->NextReady;
    207 			} else {
    208 				*list = inst->NextReady;
    209 			}
    210 			inst->NextReady = NULL;
    211 			break;
    212 		}
    213 	}
    214 }
    215 
    216 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
    217 {
    218 	inst->NextReady = *list;
    219 	*list = inst;
    220 }
    221 
    222 static void add_inst_to_list_score(struct schedule_instruction ** list,
    223 					struct schedule_instruction * inst)
    224 {
    225 	struct schedule_instruction * temp;
    226 	struct schedule_instruction * prev;
    227 	if (!*list) {
    228 		*list = inst;
    229 		return;
    230 	}
    231 	temp = *list;
    232 	prev = NULL;
    233 	while(temp && inst->Score <= temp->Score) {
    234 		prev = temp;
    235 		temp = temp->NextReady;
    236 	}
    237 
    238 	if (!prev) {
    239 		inst->NextReady = temp;
    240 		*list = inst;
    241 	} else {
    242 		prev->NextReady = inst;
    243 		inst->NextReady = temp;
    244 	}
    245 }
    246 
    247 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
    248 {
    249 	DBG("%i is now ready\n", sinst->Instruction->IP);
    250 
    251 	/* Adding Ready TEX instructions to the end of the "Ready List" helps
    252 	 * us emit TEX instructions in blocks without losing our place. */
    253 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
    254 		add_inst_to_list_score(&s->ReadyTEX, sinst);
    255 	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
    256 		add_inst_to_list_score(&s->ReadyRGB, sinst);
    257 	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
    258 		add_inst_to_list_score(&s->ReadyAlpha, sinst);
    259 	else
    260 		add_inst_to_list_score(&s->ReadyFullALU, sinst);
    261 }
    262 
    263 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
    264 {
    265 	assert(sinst->NumDependencies > 0);
    266 	sinst->NumDependencies--;
    267 	if (!sinst->NumDependencies)
    268 		instruction_ready(s, sinst);
    269 }
    270 
    271 /* These functions provide different heuristics for scheduling instructions.
    272  * The default is calc_score_readers. */
    273 
    274 #if 0
    275 
    276 static void calc_score_zero(struct schedule_instruction * sinst)
    277 {
    278 	sinst->Score = 0;
    279 }
    280 
    281 static void calc_score_deps(struct schedule_instruction * sinst)
    282 {
    283 	int i;
    284 	sinst->Score = 0;
    285 	for (i = 0; i < sinst->NumWriteValues; i++) {
    286 		struct reg_value * v = sinst->WriteValues[i];
    287 		if (v->NumReaders) {
    288 			struct reg_value_reader * r;
    289 			for (r = v->Readers; r; r = r->Next) {
    290 				if (r->Reader->NumDependencies == 1) {
    291 					sinst->Score += 100;
    292 				}
    293 				sinst->Score += r->Reader->NumDependencies;
    294 			}
    295 		}
    296 	}
    297 }
    298 
    299 #endif
    300 
    301 #define NO_OUTPUT_SCORE (1 << 24)
    302 
    303 static void score_no_output(struct schedule_instruction * sinst)
    304 {
    305 	assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
    306 	if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
    307 			!sinst->Instruction->U.P.Alpha.OutputWriteMask) {
    308 		if (sinst->PairedInst) {
    309 			if (!sinst->PairedInst->Instruction->U.P.
    310 							RGB.OutputWriteMask
    311 					&& !sinst->PairedInst->Instruction->U.P.
    312 							Alpha.OutputWriteMask) {
    313 				sinst->Score |= NO_OUTPUT_SCORE;
    314 			}
    315 
    316 		} else {
    317 			sinst->Score |= NO_OUTPUT_SCORE;
    318 		}
    319 	}
    320 }
    321 
    322 #define PAIRED_SCORE (1 << 16)
    323 
    324 static void calc_score_r300(struct schedule_instruction * sinst)
    325 {
    326 	unsigned src_idx;
    327 
    328 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
    329 		sinst->Score = 0;
    330 		return;
    331 	}
    332 
    333 	score_no_output(sinst);
    334 
    335 	if (sinst->PairedInst) {
    336 		sinst->Score |= PAIRED_SCORE;
    337 		return;
    338 	}
    339 
    340 	for (src_idx = 0; src_idx < 4; src_idx++) {
    341 		sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
    342 				sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
    343 	}
    344 }
    345 
    346 #define NO_READ_TEX_SCORE (1 << 16)
    347 
    348 static void calc_score_readers(struct schedule_instruction * sinst)
    349 {
    350 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
    351 		sinst->Score = 0;
    352 	} else {
    353 		sinst->Score = sinst->NumReadValues;
    354 		if (sinst->PairedInst) {
    355 			sinst->Score += sinst->PairedInst->NumReadValues;
    356 		}
    357 		if (get_tex_read_count(sinst) == 0) {
    358 			sinst->Score |= NO_READ_TEX_SCORE;
    359 		}
    360 		score_no_output(sinst);
    361 	}
    362 }
    363 
    364 /**
    365  * This function decreases the dependencies of the next instruction that
    366  * wants to write to each of sinst's read values.
    367  */
    368 static void commit_update_reads(struct schedule_state * s,
    369 					struct schedule_instruction * sinst){
    370 	unsigned int i;
    371 	for(i = 0; i < sinst->NumReadValues; ++i) {
    372 		struct reg_value * v = sinst->ReadValues[i];
    373 		assert(v->NumReaders > 0);
    374 		v->NumReaders--;
    375 		if (!v->NumReaders) {
    376 			if (v->Next) {
    377 				decrease_dependencies(s, v->Next->Writer);
    378 			}
    379 		}
    380 	}
    381 	if (sinst->PairedInst) {
    382 		commit_update_reads(s, sinst->PairedInst);
    383 	}
    384 }
    385 
    386 static void commit_update_writes(struct schedule_state * s,
    387 					struct schedule_instruction * sinst){
    388 	unsigned int i;
    389 	for(i = 0; i < sinst->NumWriteValues; ++i) {
    390 		struct reg_value * v = sinst->WriteValues[i];
    391 		if (v->NumReaders) {
    392 			for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
    393 				decrease_dependencies(s, r->Reader);
    394 			}
    395 		} else {
    396 			/* This happens in instruction sequences of the type
    397 			 *  OP r.x, ...;
    398 			 *  OP r.x, r.x, ...;
    399 			 * See also the subtlety in how instructions that both
    400 			 * read and write the same register are scanned.
    401 			 */
    402 			if (v->Next)
    403 				decrease_dependencies(s, v->Next->Writer);
    404 		}
    405 	}
    406 	if (sinst->PairedInst) {
    407 		commit_update_writes(s, sinst->PairedInst);
    408 	}
    409 }
    410 
    411 static void notify_sem_wait(struct schedule_state *s)
    412 {
    413 	struct rc_list * pend_ptr;
    414 	for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
    415 		struct rc_list * read_ptr;
    416 		struct schedule_instruction * pending = pend_ptr->Item;
    417 		for (read_ptr = pending->TexReaders; read_ptr;
    418 						read_ptr = read_ptr->Next) {
    419 			struct schedule_instruction * reader = read_ptr->Item;
    420 			reader->TexReadCount--;
    421 		}
    422 	}
    423 	s->PendingTEX = NULL;
    424 }
    425 
    426 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
    427 {
    428 	DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
    429 
    430 	commit_update_reads(s, sinst);
    431 
    432 	commit_update_writes(s, sinst);
    433 
    434 	if (get_tex_read_count(sinst) > 0) {
    435 		sinst->Instruction->U.P.SemWait = 1;
    436 		notify_sem_wait(s);
    437 	}
    438 }
    439 
    440 /**
    441  * Emit all ready texture instructions in a single block.
    442  *
    443  * Emit as a single block to (hopefully) sample many textures in parallel,
    444  * and to avoid hardware indirections on R300.
    445  */
    446 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
    447 {
    448 	struct schedule_instruction *readytex;
    449 	struct rc_instruction * inst_begin;
    450 
    451 	assert(s->ReadyTEX);
    452 	notify_sem_wait(s);
    453 
    454 	/* Node marker for R300 */
    455 	inst_begin = rc_insert_new_instruction(s->C, before->Prev);
    456 	inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
    457 
    458 	/* Link texture instructions back in */
    459 	readytex = s->ReadyTEX;
    460 	while(readytex) {
    461 		rc_insert_instruction(before->Prev, readytex->Instruction);
    462 		DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
    463 
    464 		/* All of the TEX instructions in the same TEX block have
    465 		 * their source registers read from before any of the
    466 		 * instructions in that block write to their destination
    467 		 * registers.  This means that when we commit a TEX
    468 		 * instruction, any other TEX instruction that wants to write
    469 		 * to one of the committed instruction's source register can be
    470 		 * marked as ready and should be emitted in the same TEX
    471 		 * block. This prevents the following sequence from being
    472 		 * emitted in two different TEX blocks:
    473 		 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
    474 		 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
    475 		 */
    476 		commit_update_reads(s, readytex);
    477 		readytex = readytex->NextReady;
    478 	}
    479 	readytex = s->ReadyTEX;
    480 	s->ReadyTEX = 0;
    481 	while(readytex){
    482 		DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
    483 		commit_update_writes(s, readytex);
    484 		/* Set semaphore bits for last TEX instruction in the block */
    485 		if (!readytex->NextReady) {
    486 			readytex->Instruction->U.I.TexSemAcquire = 1;
    487 			readytex->Instruction->U.I.TexSemWait = 1;
    488 		}
    489 		rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
    490 		readytex = readytex->NextReady;
    491 	}
    492 }
    493 
    494 /* This is a helper function for destructive_merge_instructions().  It helps
    495  * merge presubtract sources from two instructions and makes sure the
    496  * presubtract sources end up in the correct spot.  This function assumes that
    497  * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
    498  * but no scalar instruction (alpha).
    499  * @return 0 if merging the presubtract sources fails.
    500  * @retrun 1 if merging the presubtract sources succeeds.
    501  */
    502 static int merge_presub_sources(
    503 	struct rc_pair_instruction * dst_full,
    504 	struct rc_pair_sub_instruction src,
    505 	unsigned int type)
    506 {
    507 	unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
    508 	struct rc_pair_sub_instruction * dst_sub;
    509 	const struct rc_opcode_info * info;
    510 
    511 	assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
    512 
    513 	switch(type) {
    514 	case RC_SOURCE_RGB:
    515 		is_rgb = 1;
    516 		is_alpha = 0;
    517 		dst_sub = &dst_full->RGB;
    518 		break;
    519 	case RC_SOURCE_ALPHA:
    520 		is_rgb = 0;
    521 		is_alpha = 1;
    522 		dst_sub = &dst_full->Alpha;
    523 		break;
    524 	default:
    525 		assert(0);
    526 		return 0;
    527 	}
    528 
    529 	info = rc_get_opcode_info(dst_full->RGB.Opcode);
    530 
    531 	if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
    532 		return 0;
    533 
    534 	srcp_regs = rc_presubtract_src_reg_count(
    535 					src.Src[RC_PAIR_PRESUB_SRC].Index);
    536 	for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
    537 		unsigned int arg;
    538 		int free_source;
    539 		unsigned int one_way = 0;
    540 		struct rc_pair_instruction_source srcp = src.Src[srcp_src];
    541 		struct rc_pair_instruction_source temp;
    542 
    543 		free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
    544 							srcp.File, srcp.Index);
    545 
    546 		/* If free_source < 0 then there are no free source
    547 		 * slots. */
    548 		if (free_source < 0)
    549 			return 0;
    550 
    551 		temp = dst_sub->Src[srcp_src];
    552 		dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
    553 
    554 		/* srcp needs src0 and src1 to be the same */
    555 		if (free_source < srcp_src) {
    556 			if (!temp.Used)
    557 				continue;
    558 			free_source = rc_pair_alloc_source(dst_full, is_rgb,
    559 					is_alpha, temp.File, temp.Index);
    560 			if (free_source < 0)
    561 				return 0;
    562 			one_way = 1;
    563 		} else {
    564 			dst_sub->Src[free_source] = temp;
    565 		}
    566 
    567 		/* If free_source == srcp_src, then the presubtract
    568 		 * source is already in the correct place. */
    569 		if (free_source == srcp_src)
    570 			continue;
    571 
    572 		/* Shuffle the sources, so we can put the
    573 		 * presubtract source in the correct place. */
    574 		for(arg = 0; arg < info->NumSrcRegs; arg++) {
    575 			/*If this arg does not read from an rgb source,
    576 			 * do nothing. */
    577 			if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
    578 								& type)) {
    579 				continue;
    580 			}
    581 
    582 			if (dst_full->RGB.Arg[arg].Source == srcp_src)
    583 				dst_full->RGB.Arg[arg].Source = free_source;
    584 			/* We need to do this just in case register
    585 			 * is one of the sources already, but in the
    586 			 * wrong spot. */
    587 			else if(dst_full->RGB.Arg[arg].Source == free_source
    588 							&& !one_way) {
    589 				dst_full->RGB.Arg[arg].Source = srcp_src;
    590 			}
    591 		}
    592 	}
    593 	return 1;
    594 }
    595 
    596 
    597 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
    598 static int destructive_merge_instructions(
    599 		struct rc_pair_instruction * rgb,
    600 		struct rc_pair_instruction * alpha)
    601 {
    602 	const struct rc_opcode_info * opcode;
    603 
    604 	assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
    605 	assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
    606 
    607 	/* Presubtract registers need to be merged first so that registers
    608 	 * needed by the presubtract operation can be placed in src0 and/or
    609 	 * src1. */
    610 
    611 	/* Merge the rgb presubtract registers. */
    612 	if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
    613 		if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
    614 			return 0;
    615 		}
    616 	}
    617 	/* Merge the alpha presubtract registers */
    618 	if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
    619 		if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
    620 			return 0;
    621 		}
    622 	}
    623 
    624 	/* Copy alpha args into rgb */
    625 	opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
    626 
    627 	for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
    628 		unsigned int srcrgb = 0;
    629 		unsigned int srcalpha = 0;
    630 		unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
    631 		rc_register_file file = 0;
    632 		unsigned int index = 0;
    633 		int source;
    634 
    635 		if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
    636 			srcrgb = 1;
    637 			file = alpha->RGB.Src[oldsrc].File;
    638 			index = alpha->RGB.Src[oldsrc].Index;
    639 		} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
    640 			srcalpha = 1;
    641 			file = alpha->Alpha.Src[oldsrc].File;
    642 			index = alpha->Alpha.Src[oldsrc].Index;
    643 		}
    644 
    645 		source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
    646 		if (source < 0)
    647 			return 0;
    648 
    649 		rgb->Alpha.Arg[arg].Source = source;
    650 		rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
    651 		rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
    652 		rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
    653 	}
    654 
    655 	/* Copy alpha opcode into rgb */
    656 	rgb->Alpha.Opcode = alpha->Alpha.Opcode;
    657 	rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
    658 	rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
    659 	rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
    660 	rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
    661 	rgb->Alpha.Saturate = alpha->Alpha.Saturate;
    662 	rgb->Alpha.Omod = alpha->Alpha.Omod;
    663 
    664 	/* Merge ALU result writing */
    665 	if (alpha->WriteALUResult) {
    666 		if (rgb->WriteALUResult)
    667 			return 0;
    668 
    669 		rgb->WriteALUResult = alpha->WriteALUResult;
    670 		rgb->ALUResultCompare = alpha->ALUResultCompare;
    671 	}
    672 
    673 	/* Copy SemWait */
    674 	rgb->SemWait |= alpha->SemWait;
    675 
    676 	return 1;
    677 }
    678 
    679 /**
    680  * Try to merge the given instructions into the rgb instructions.
    681  *
    682  * Return true on success; on failure, return false, and keep
    683  * the instructions untouched.
    684  */
    685 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
    686 {
    687 	struct rc_pair_instruction backup;
    688 
    689 	/*Instructions can't write output registers and ALU result at the
    690 	 * same time. */
    691 	if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
    692 		|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
    693 		return 0;
    694 	}
    695 
    696 	/* Writing output registers in the middle of shaders is slow, so
    697 	 * we don't want to pair output writes with temp writes. */
    698 	if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
    699 		|| (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
    700 		return 0;
    701 	}
    702 
    703 	memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
    704 
    705 	if (destructive_merge_instructions(rgb, alpha))
    706 		return 1;
    707 
    708 	memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
    709 	return 0;
    710 }
    711 
    712 static void presub_nop(struct rc_instruction * emitted) {
    713 	int prev_rgb_index, prev_alpha_index, i, num_src;
    714 
    715 	/* We don't need a nop if the previous instruction is a TEX. */
    716 	if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
    717 		return;
    718 	}
    719 	if (emitted->Prev->U.P.RGB.WriteMask)
    720 		prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
    721 	else
    722 		prev_rgb_index = -1;
    723 	if (emitted->Prev->U.P.Alpha.WriteMask)
    724 		prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
    725 	else
    726 		prev_alpha_index = 1;
    727 
    728 	/* Check the previous rgb instruction */
    729 	if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
    730 		num_src = rc_presubtract_src_reg_count(
    731 				emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
    732 		for (i = 0; i < num_src; i++) {
    733 			unsigned int index = emitted->U.P.RGB.Src[i].Index;
    734 			if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
    735 			    && (index  == prev_rgb_index
    736 				|| index == prev_alpha_index)) {
    737 				emitted->Prev->U.P.Nop = 1;
    738 				return;
    739 			}
    740 		}
    741 	}
    742 
    743 	/* Check the previous alpha instruction. */
    744 	if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
    745 		return;
    746 
    747 	num_src = rc_presubtract_src_reg_count(
    748 				emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
    749 	for (i = 0; i < num_src; i++) {
    750 		unsigned int index = emitted->U.P.Alpha.Src[i].Index;
    751 		if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
    752 		   && (index == prev_rgb_index || index == prev_alpha_index)) {
    753 			emitted->Prev->U.P.Nop = 1;
    754 			return;
    755 		}
    756 	}
    757 }
    758 
    759 static void rgb_to_alpha_remap (
    760 	struct rc_instruction * inst,
    761 	struct rc_pair_instruction_arg * arg,
    762 	rc_register_file old_file,
    763 	rc_swizzle old_swz,
    764 	unsigned int new_index)
    765 {
    766 	int new_src_index;
    767 	unsigned int i;
    768 
    769 	for (i = 0; i < 3; i++) {
    770 		if (get_swz(arg->Swizzle, i) == old_swz) {
    771 			SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
    772 		}
    773 	}
    774 	new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
    775 							old_file, new_index);
    776 	/* This conversion is not possible, we must have made a mistake in
    777 	 * is_rgb_to_alpha_possible. */
    778 	if (new_src_index < 0) {
    779 		assert(0);
    780 		return;
    781 	}
    782 
    783 	arg->Source = new_src_index;
    784 }
    785 
    786 static int can_remap(unsigned int opcode)
    787 {
    788 	switch(opcode) {
    789 	case RC_OPCODE_DDX:
    790 	case RC_OPCODE_DDY:
    791 		return 0;
    792 	default:
    793 		return 1;
    794 	}
    795 }
    796 
    797 static int can_convert_opcode_to_alpha(unsigned int opcode)
    798 {
    799 	switch(opcode) {
    800 	case RC_OPCODE_DDX:
    801 	case RC_OPCODE_DDY:
    802 	case RC_OPCODE_DP2:
    803 	case RC_OPCODE_DP3:
    804 	case RC_OPCODE_DP4:
    805 	case RC_OPCODE_DPH:
    806 		return 0;
    807 	default:
    808 		return 1;
    809 	}
    810 }
    811 
    812 static void is_rgb_to_alpha_possible(
    813 	void * userdata,
    814 	struct rc_instruction * inst,
    815 	struct rc_pair_instruction_arg * arg,
    816 	struct rc_pair_instruction_source * src)
    817 {
    818 	unsigned int read_chan = RC_SWIZZLE_UNUSED;
    819 	unsigned int alpha_sources = 0;
    820 	unsigned int i;
    821 	struct rc_reader_data * reader_data = userdata;
    822 
    823 	if (!can_remap(inst->U.P.RGB.Opcode)
    824 	    || !can_remap(inst->U.P.Alpha.Opcode)) {
    825 		reader_data->Abort = 1;
    826 		return;
    827 	}
    828 
    829 	if (!src)
    830 		return;
    831 
    832 	/* XXX There are some cases where we can still do the conversion if
    833 	 * a reader reads from a presubtract source, but for now we'll prevent
    834 	 * it. */
    835 	if (arg->Source == RC_PAIR_PRESUB_SRC) {
    836 		reader_data->Abort = 1;
    837 		return;
    838 	}
    839 
    840 	/* Make sure the source only reads the register component that we
    841 	 * are going to be convering from.  It is OK if the instruction uses
    842 	 * this component more than once.
    843 	 * XXX If the index we will be converting to is the same as the
    844 	 * current index, then it is OK to read from more than one component.
    845 	 */
    846 	for (i = 0; i < 3; i++) {
    847 		rc_swizzle swz = get_swz(arg->Swizzle, i);
    848 		switch(swz) {
    849 		case RC_SWIZZLE_X:
    850 		case RC_SWIZZLE_Y:
    851 		case RC_SWIZZLE_Z:
    852 		case RC_SWIZZLE_W:
    853 			if (read_chan == RC_SWIZZLE_UNUSED) {
    854 				read_chan = swz;
    855 			} else if (read_chan != swz) {
    856 				reader_data->Abort = 1;
    857 				return;
    858 			}
    859 			break;
    860 		default:
    861 			break;
    862 		}
    863 	}
    864 
    865 	/* Make sure there are enough alpha sources.
    866 	 * XXX If we know what register all the readers are going
    867 	 * to be remapped to, then in some situations we can still do
    868 	 * the subsitution, even if all 3 alpha sources are being used.*/
    869 	for (i = 0; i < 3; i++) {
    870 		if (inst->U.P.Alpha.Src[i].Used) {
    871 			alpha_sources++;
    872 		}
    873 	}
    874 	if (alpha_sources > 2) {
    875 		reader_data->Abort = 1;
    876 		return;
    877 	}
    878 }
    879 
    880 static int convert_rgb_to_alpha(
    881 	struct schedule_state * s,
    882 	struct schedule_instruction * sched_inst)
    883 {
    884 	struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
    885 	unsigned int old_mask = pair_inst->RGB.WriteMask;
    886 	unsigned int old_swz = rc_mask_to_swizzle(old_mask);
    887 	const struct rc_opcode_info * info =
    888 				rc_get_opcode_info(pair_inst->RGB.Opcode);
    889 	int new_index = -1;
    890 	unsigned int i;
    891 
    892 	if (sched_inst->GlobalReaders.Abort)
    893 		return 0;
    894 
    895 	if (!pair_inst->RGB.WriteMask)
    896 		return 0;
    897 
    898 	if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
    899 	    || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
    900 		return 0;
    901 	}
    902 
    903 	assert(sched_inst->NumWriteValues == 1);
    904 
    905 	if (!sched_inst->WriteValues[0]) {
    906 		assert(0);
    907 		return 0;
    908 	}
    909 
    910 	/* We start at the old index, because if we can reuse the same
    911 	 * register and just change the swizzle then it is more likely we
    912 	 * will be able to convert all the readers. */
    913 	for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
    914 		struct reg_value ** new_regvalp = get_reg_valuep(
    915 						s, RC_FILE_TEMPORARY, i, 3);
    916 		if (!*new_regvalp) {
    917 			struct reg_value ** old_regvalp =
    918 				get_reg_valuep(s,
    919 					RC_FILE_TEMPORARY,
    920 					pair_inst->RGB.DestIndex,
    921 					rc_mask_to_swizzle(old_mask));
    922 			new_index = i;
    923 			*new_regvalp = *old_regvalp;
    924 			*old_regvalp = NULL;
    925 			new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
    926 			break;
    927 		}
    928 	}
    929 	if (new_index < 0) {
    930 		return 0;
    931 	}
    932 
    933 	/* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
    934 	 * as the RGB opcode, then the Alpha instruction will already contain
    935 	 * the correct opcode and instruction args, so we do not want to
    936 	 * overwrite them.
    937 	 */
    938 	if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
    939 		pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
    940 		memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
    941 						sizeof(pair_inst->Alpha.Arg));
    942 	}
    943 	pair_inst->Alpha.DestIndex = new_index;
    944 	pair_inst->Alpha.WriteMask = RC_MASK_W;
    945 	pair_inst->Alpha.Target = pair_inst->RGB.Target;
    946 	pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
    947 	pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
    948 	pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
    949 	pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
    950 	/* Move the swizzles into the first chan */
    951 	for (i = 0; i < info->NumSrcRegs; i++) {
    952 		unsigned int j;
    953 		for (j = 0; j < 3; j++) {
    954 			unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
    955 			if (swz != RC_SWIZZLE_UNUSED) {
    956 				pair_inst->Alpha.Arg[i].Swizzle =
    957 							rc_init_swizzle(swz, 1);
    958 				break;
    959 			}
    960 		}
    961 	}
    962 	pair_inst->RGB.Opcode = RC_OPCODE_NOP;
    963 	pair_inst->RGB.DestIndex = 0;
    964 	pair_inst->RGB.WriteMask = 0;
    965 	pair_inst->RGB.Target = 0;
    966 	pair_inst->RGB.OutputWriteMask = 0;
    967 	pair_inst->RGB.DepthWriteMask = 0;
    968 	pair_inst->RGB.Saturate = 0;
    969 	memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
    970 
    971 	for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
    972 		struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
    973 		rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
    974 					RC_FILE_TEMPORARY, old_swz, new_index);
    975 	}
    976 	return 1;
    977 }
    978 
    979 static void try_convert_and_pair(
    980 	struct schedule_state *s,
    981 	struct schedule_instruction ** inst_list)
    982 {
    983 	struct schedule_instruction * list_ptr = *inst_list;
    984 	while (list_ptr && *inst_list && (*inst_list)->NextReady) {
    985 		int paired = 0;
    986 		if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
    987 			&& list_ptr->Instruction->U.P.RGB.Opcode
    988 						!= RC_OPCODE_REPL_ALPHA) {
    989 				goto next;
    990 		}
    991 		if (list_ptr->NumWriteValues == 1
    992 					&& convert_rgb_to_alpha(s, list_ptr)) {
    993 
    994 			struct schedule_instruction * pair_ptr;
    995 			remove_inst_from_list(inst_list, list_ptr);
    996 			add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
    997 
    998 			for (pair_ptr = s->ReadyRGB; pair_ptr;
    999 					pair_ptr = pair_ptr->NextReady) {
   1000 				if (merge_instructions(&pair_ptr->Instruction->U.P,
   1001 						&list_ptr->Instruction->U.P)) {
   1002 					remove_inst_from_list(&s->ReadyAlpha, list_ptr);
   1003 					remove_inst_from_list(&s->ReadyRGB, pair_ptr);
   1004 					pair_ptr->PairedInst = list_ptr;
   1005 
   1006 					add_inst_to_list(&s->ReadyFullALU, pair_ptr);
   1007 					list_ptr = *inst_list;
   1008 					paired = 1;
   1009 					break;
   1010 				}
   1011 
   1012 			}
   1013 		}
   1014 		if (!paired) {
   1015 next:
   1016 			list_ptr = list_ptr->NextReady;
   1017 		}
   1018 	}
   1019 }
   1020 
   1021 /**
   1022  * This function attempts to merge RGB and Alpha instructions together.
   1023  */
   1024 static void pair_instructions(struct schedule_state * s)
   1025 {
   1026 	struct schedule_instruction *rgb_ptr;
   1027 	struct schedule_instruction *alpha_ptr;
   1028 
   1029 	/* Some pairings might fail because they require too
   1030 	 * many source slots; try all possible pairings if necessary */
   1031 	rgb_ptr = s->ReadyRGB;
   1032 	while(rgb_ptr) {
   1033 		struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
   1034 		alpha_ptr = s->ReadyAlpha;
   1035 		while(alpha_ptr) {
   1036 			struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
   1037 			if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
   1038 				/* Remove RGB and Alpha from their ready lists.
   1039 				 */
   1040 				remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
   1041 				remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
   1042 				rgb_ptr->PairedInst = alpha_ptr;
   1043 				add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
   1044 				break;
   1045 			}
   1046 			alpha_ptr = alpha_next;
   1047 		}
   1048 		rgb_ptr = rgb_next;
   1049 	}
   1050 
   1051 	if (!s->Opt) {
   1052 		return;
   1053 	}
   1054 
   1055 	/* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
   1056 	 * slot can be converted into Alpha instructions. */
   1057 	try_convert_and_pair(s, &s->ReadyFullALU);
   1058 
   1059 	/* Try to convert some of the RGB instructions to Alpha and
   1060 	 * try to pair it with another RGB. */
   1061 	try_convert_and_pair(s, &s->ReadyRGB);
   1062 }
   1063 
   1064 static void update_max_score(
   1065 	struct schedule_state * s,
   1066 	struct schedule_instruction ** list,
   1067 	int * max_score,
   1068 	struct schedule_instruction ** max_inst_out,
   1069 	struct schedule_instruction *** list_out)
   1070 {
   1071 	struct schedule_instruction * list_ptr;
   1072 	for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
   1073 		int score;
   1074 		s->CalcScore(list_ptr);
   1075 		score = list_ptr->Score;
   1076 		if (!*max_inst_out || score > *max_score) {
   1077 			*max_score = score;
   1078 			*max_inst_out = list_ptr;
   1079 			*list_out = list;
   1080 		}
   1081 	}
   1082 }
   1083 
   1084 static void emit_instruction(
   1085 	struct schedule_state * s,
   1086 	struct rc_instruction * before)
   1087 {
   1088 	int max_score = -1;
   1089 	struct schedule_instruction * max_inst = NULL;
   1090 	struct schedule_instruction ** max_list = NULL;
   1091 	unsigned tex_count = 0;
   1092 	struct schedule_instruction * tex_ptr;
   1093 
   1094 	pair_instructions(s);
   1095 #if VERBOSE
   1096 	fprintf(stderr, "Full:\n");
   1097 	print_list(s->ReadyFullALU);
   1098 	fprintf(stderr, "RGB:\n");
   1099 	print_list(s->ReadyRGB);
   1100 	fprintf(stderr, "Alpha:\n");
   1101 	print_list(s->ReadyAlpha);
   1102 	fprintf(stderr, "TEX:\n");
   1103 	print_list(s->ReadyTEX);
   1104 #endif
   1105 
   1106 	for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
   1107 		if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
   1108 			emit_all_tex(s, before);
   1109 			return;
   1110 		}
   1111 		tex_count++;
   1112 	}
   1113 	update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
   1114 	update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
   1115 	update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
   1116 
   1117 	if (tex_count >= s->max_tex_group || max_score == -1
   1118 		|| (s->TEXCount > 0 && tex_count == s->TEXCount)
   1119 		|| (!s->C->is_r500 && tex_count > 0 && max_score == -1)) {
   1120 		emit_all_tex(s, before);
   1121 	} else {
   1122 
   1123 
   1124 		remove_inst_from_list(max_list, max_inst);
   1125 		rc_insert_instruction(before->Prev, max_inst->Instruction);
   1126 		commit_alu_instruction(s, max_inst);
   1127 
   1128 		presub_nop(before->Prev);
   1129 	}
   1130 }
   1131 
   1132 static void add_tex_reader(
   1133 	struct schedule_state * s,
   1134 	struct schedule_instruction * writer,
   1135 	struct schedule_instruction * reader)
   1136 {
   1137 	if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
   1138 		/*Not a TEX instructions */
   1139 		return;
   1140 	}
   1141 	reader->TexReadCount++;
   1142 	rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
   1143 }
   1144 
   1145 static void scan_read(void * data, struct rc_instruction * inst,
   1146 		rc_register_file file, unsigned int index, unsigned int chan)
   1147 {
   1148 	struct schedule_state * s = data;
   1149 	struct reg_value ** v = get_reg_valuep(s, file, index, chan);
   1150 	struct reg_value_reader * reader;
   1151 
   1152 	if (!v)
   1153 		return;
   1154 
   1155 	if (*v && (*v)->Writer == s->Current) {
   1156 		/* The instruction reads and writes to a register component.
   1157 		 * In this case, we only want to increment dependencies by one.
   1158 		 * Why?
   1159 		 * Because each instruction depends on the writers of its source
   1160 		 * registers _and_ the most recent writer of its destination
   1161 		 * register.  In this case, the current instruction (s->Current)
   1162 		 * has a dependency that both writes to one of its source
   1163 		 * registers and was the most recent writer to its destination
   1164 		 * register.  We have already marked this dependency in
   1165 		 * scan_write(), so we don't need to do it again.
   1166 		 */
   1167 
   1168 		/* We need to make sure we are adding s->Current to the
   1169 		 * previous writer's list of TexReaders, if the previous writer
   1170 		 * was a TEX instruction.
   1171 		 */
   1172 		add_tex_reader(s, s->PrevWriter[chan], s->Current);
   1173 
   1174 		return;
   1175 	}
   1176 
   1177 	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
   1178 
   1179 	reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
   1180 	reader->Reader = s->Current;
   1181 	if (!*v) {
   1182 		/* In this situation, the instruction reads from a register
   1183 		 * that hasn't been written to or read from in the current
   1184 		 * block. */
   1185 		*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
   1186 		memset(*v, 0, sizeof(struct reg_value));
   1187 		(*v)->Readers = reader;
   1188 	} else {
   1189 		reader->Next = (*v)->Readers;
   1190 		(*v)->Readers = reader;
   1191 		/* Only update the current instruction's dependencies if the
   1192 		 * register it reads from has been written to in this block. */
   1193 		if ((*v)->Writer) {
   1194 			add_tex_reader(s, (*v)->Writer, s->Current);
   1195 			s->Current->NumDependencies++;
   1196 		}
   1197 	}
   1198 	(*v)->NumReaders++;
   1199 
   1200 	if (s->Current->NumReadValues >= 12) {
   1201 		rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
   1202 	} else {
   1203 		s->Current->ReadValues[s->Current->NumReadValues++] = *v;
   1204 	}
   1205 }
   1206 
   1207 static void scan_write(void * data, struct rc_instruction * inst,
   1208 		rc_register_file file, unsigned int index, unsigned int chan)
   1209 {
   1210 	struct schedule_state * s = data;
   1211 	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
   1212 	struct reg_value * newv;
   1213 
   1214 	if (!pv)
   1215 		return;
   1216 
   1217 	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
   1218 
   1219 	newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
   1220 	memset(newv, 0, sizeof(*newv));
   1221 
   1222 	newv->Writer = s->Current;
   1223 
   1224 	if (*pv) {
   1225 		(*pv)->Next = newv;
   1226 		s->Current->NumDependencies++;
   1227 		/* Keep track of the previous writer to s->Current's destination
   1228 		 * register */
   1229 		s->PrevWriter[chan] = (*pv)->Writer;
   1230 	}
   1231 
   1232 	*pv = newv;
   1233 
   1234 	if (s->Current->NumWriteValues >= 4) {
   1235 		rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
   1236 	} else {
   1237 		s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
   1238 	}
   1239 }
   1240 
   1241 static void is_rgb_to_alpha_possible_normal(
   1242 	void * userdata,
   1243 	struct rc_instruction * inst,
   1244 	struct rc_src_register * src)
   1245 {
   1246 	struct rc_reader_data * reader_data = userdata;
   1247 	reader_data->Abort = 1;
   1248 
   1249 }
   1250 
   1251 static void schedule_block(struct schedule_state * s,
   1252 		struct rc_instruction * begin, struct rc_instruction * end)
   1253 {
   1254 	unsigned int ip;
   1255 
   1256 	/* Scan instructions for data dependencies */
   1257 	ip = 0;
   1258 	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
   1259 		s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
   1260 		memset(s->Current, 0, sizeof(struct schedule_instruction));
   1261 
   1262 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
   1263 			const struct rc_opcode_info * info =
   1264 					rc_get_opcode_info(inst->U.I.Opcode);
   1265 			if (info->HasTexture) {
   1266 				s->TEXCount++;
   1267 			}
   1268 		}
   1269 
   1270 		/* XXX: This causes SemWait to be set for all instructions in
   1271 		 * a block if the previous block contained a TEX instruction.
   1272 		 * We can do better here, but it will take a lot of work. */
   1273 		if (s->PrevBlockHasTex) {
   1274 			s->Current->TexReadCount = 1;
   1275 		}
   1276 
   1277 		s->Current->Instruction = inst;
   1278 		inst->IP = ip++;
   1279 
   1280 		DBG("%i: Scanning\n", inst->IP);
   1281 
   1282 		/* The order of things here is subtle and maybe slightly
   1283 		 * counter-intuitive, to account for the case where an
   1284 		 * instruction writes to the same register as it reads
   1285 		 * from. */
   1286 		rc_for_all_writes_chan(inst, &scan_write, s);
   1287 		rc_for_all_reads_chan(inst, &scan_read, s);
   1288 
   1289 		DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
   1290 
   1291 		if (!s->Current->NumDependencies) {
   1292 			instruction_ready(s, s->Current);
   1293 		}
   1294 
   1295 		/* Get global readers for possible RGB->Alpha conversion. */
   1296 		s->Current->GlobalReaders.ExitOnAbort = 1;
   1297 		rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
   1298 				is_rgb_to_alpha_possible_normal,
   1299 				is_rgb_to_alpha_possible, NULL);
   1300 	}
   1301 
   1302 	/* Temporarily unlink all instructions */
   1303 	begin->Prev->Next = end;
   1304 	end->Prev = begin->Prev;
   1305 
   1306 	/* Schedule instructions back */
   1307 	while(!s->C->Error &&
   1308 	      (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
   1309 		emit_instruction(s, end);
   1310 	}
   1311 }
   1312 
   1313 static int is_controlflow(struct rc_instruction * inst)
   1314 {
   1315 	if (inst->Type == RC_INSTRUCTION_NORMAL) {
   1316 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
   1317 		return opcode->IsFlowControl;
   1318 	}
   1319 	return 0;
   1320 }
   1321 
   1322 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
   1323 {
   1324 	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
   1325 	struct schedule_state s;
   1326 	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
   1327 	unsigned int * opt = user;
   1328 
   1329 	memset(&s, 0, sizeof(s));
   1330 	s.Opt = *opt;
   1331 	s.C = &c->Base;
   1332 	if (s.C->is_r500) {
   1333 		s.CalcScore = calc_score_readers;
   1334 	} else {
   1335 		s.CalcScore = calc_score_r300;
   1336 	}
   1337 	s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
   1338 	while(inst != &c->Base.Program.Instructions) {
   1339 		struct rc_instruction * first;
   1340 
   1341 		if (is_controlflow(inst)) {
   1342 			inst = inst->Next;
   1343 			continue;
   1344 		}
   1345 
   1346 		first = inst;
   1347 
   1348 		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
   1349 			inst = inst->Next;
   1350 
   1351 		DBG("Schedule one block\n");
   1352 		memset(s.Temporary, 0, sizeof(s.Temporary));
   1353 		s.TEXCount = 0;
   1354 		schedule_block(&s, first, inst);
   1355 		if (s.PendingTEX) {
   1356 			s.PrevBlockHasTex = 1;
   1357 		}
   1358 	}
   1359 }
   1360