Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright (C) 2009 Nicolai Haehnle.
      3  * Copyright 2010 Tom Stellard <tstellar (at) gmail.com>
      4  *
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining
      8  * a copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sublicense, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial
     17  * portions of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  */
     28 
     29 #include "radeon_dataflow.h"
     30 
     31 #include "radeon_compiler.h"
     32 #include "radeon_compiler_util.h"
     33 #include "radeon_list.h"
     34 #include "radeon_swizzle.h"
     35 #include "radeon_variable.h"
     36 
     37 struct src_clobbered_reads_cb_data {
     38 	rc_register_file File;
     39 	unsigned int Index;
     40 	unsigned int Mask;
     41 	struct rc_reader_data * ReaderData;
     42 };
     43 
     44 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
     45 						struct rc_instruction *,
     46 						unsigned int);
     47 
     48 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
     49 {
     50 	struct rc_src_register combine;
     51 	combine.File = inner.File;
     52 	combine.Index = inner.Index;
     53 	combine.RelAddr = inner.RelAddr;
     54 	if (outer.Abs) {
     55 		combine.Abs = 1;
     56 		combine.Negate = outer.Negate;
     57 	} else {
     58 		combine.Abs = inner.Abs;
     59 		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
     60 		combine.Negate ^= outer.Negate;
     61 	}
     62 	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
     63 	return combine;
     64 }
     65 
     66 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
     67 						struct rc_src_register * src)
     68 {
     69 	rc_register_file file = src->File;
     70 	struct rc_reader_data * reader_data = data;
     71 
     72 	if(!rc_inst_can_use_presub(inst,
     73 				reader_data->Writer->U.I.PreSub.Opcode,
     74 				rc_swizzle_to_writemask(src->Swizzle),
     75 				src,
     76 				&reader_data->Writer->U.I.PreSub.SrcReg[0],
     77 				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
     78 		reader_data->Abort = 1;
     79 		return;
     80 	}
     81 
     82 	/* XXX This could probably be handled better. */
     83 	if (file == RC_FILE_ADDRESS) {
     84 		reader_data->Abort = 1;
     85 		return;
     86 	}
     87 
     88 	/* These instructions cannot read from the constants file.
     89 	 * see radeonTransformTEX()
     90 	 */
     91 	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
     92 			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
     93 				(inst->U.I.Opcode == RC_OPCODE_TEX ||
     94 				inst->U.I.Opcode == RC_OPCODE_TXB ||
     95 				inst->U.I.Opcode == RC_OPCODE_TXP ||
     96 				inst->U.I.Opcode == RC_OPCODE_TXD ||
     97 				inst->U.I.Opcode == RC_OPCODE_TXL ||
     98 				inst->U.I.Opcode == RC_OPCODE_KIL)){
     99 		reader_data->Abort = 1;
    100 		return;
    101 	}
    102 }
    103 
    104 static void src_clobbered_reads_cb(
    105 	void * data,
    106 	struct rc_instruction * inst,
    107 	struct rc_src_register * src)
    108 {
    109 	struct src_clobbered_reads_cb_data * sc_data = data;
    110 
    111 	if (src->File == sc_data->File
    112 	    && src->Index == sc_data->Index
    113 	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
    114 
    115 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
    116 	}
    117 
    118 	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
    119 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
    120 	}
    121 }
    122 
    123 static void is_src_clobbered_scan_write(
    124 	void * data,
    125 	struct rc_instruction * inst,
    126 	rc_register_file file,
    127 	unsigned int index,
    128 	unsigned int mask)
    129 {
    130 	struct src_clobbered_reads_cb_data sc_data;
    131 	struct rc_reader_data * reader_data = data;
    132 	sc_data.File = file;
    133 	sc_data.Index = index;
    134 	sc_data.Mask = mask;
    135 	sc_data.ReaderData = reader_data;
    136 	rc_for_all_reads_src(reader_data->Writer,
    137 					src_clobbered_reads_cb, &sc_data);
    138 }
    139 
    140 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
    141 {
    142 	struct rc_reader_data reader_data;
    143 	unsigned int i;
    144 
    145 	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
    146 	    inst_mov->U.I.WriteALUResult ||
    147 	    inst_mov->U.I.SaturateMode)
    148 		return;
    149 
    150 	/* Get a list of all the readers of this MOV instruction. */
    151 	reader_data.ExitOnAbort = 1;
    152 	rc_get_readers(c, inst_mov, &reader_data,
    153 		       copy_propagate_scan_read, NULL,
    154 		       is_src_clobbered_scan_write);
    155 
    156 	if (reader_data.Abort || reader_data.ReaderCount == 0)
    157 		return;
    158 
    159 	/* Propagate the MOV instruction. */
    160 	for (i = 0; i < reader_data.ReaderCount; i++) {
    161 		struct rc_instruction * inst = reader_data.Readers[i].Inst;
    162 		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
    163 
    164 		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
    165 			inst->U.I.PreSub = inst_mov->U.I.PreSub;
    166 	}
    167 
    168 	/* Finally, remove the original MOV instruction */
    169 	rc_remove_instruction(inst_mov);
    170 }
    171 
    172 /**
    173  * Check if a source register is actually always the same
    174  * swizzle constant.
    175  */
    176 static int is_src_uniform_constant(struct rc_src_register src,
    177 		rc_swizzle * pswz, unsigned int * pnegate)
    178 {
    179 	int have_used = 0;
    180 
    181 	if (src.File != RC_FILE_NONE) {
    182 		*pswz = 0;
    183 		return 0;
    184 	}
    185 
    186 	for(unsigned int chan = 0; chan < 4; ++chan) {
    187 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
    188 		if (swz < 4) {
    189 			*pswz = 0;
    190 			return 0;
    191 		}
    192 		if (swz == RC_SWIZZLE_UNUSED)
    193 			continue;
    194 
    195 		if (!have_used) {
    196 			*pswz = swz;
    197 			*pnegate = GET_BIT(src.Negate, chan);
    198 			have_used = 1;
    199 		} else {
    200 			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
    201 				*pswz = 0;
    202 				return 0;
    203 			}
    204 		}
    205 	}
    206 
    207 	return 1;
    208 }
    209 
    210 static void constant_folding_mad(struct rc_instruction * inst)
    211 {
    212 	rc_swizzle swz = 0;
    213 	unsigned int negate= 0;
    214 
    215 	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
    216 		if (swz == RC_SWIZZLE_ZERO) {
    217 			inst->U.I.Opcode = RC_OPCODE_MUL;
    218 			return;
    219 		}
    220 	}
    221 
    222 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
    223 		if (swz == RC_SWIZZLE_ONE) {
    224 			inst->U.I.Opcode = RC_OPCODE_ADD;
    225 			if (negate)
    226 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
    227 			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
    228 			return;
    229 		} else if (swz == RC_SWIZZLE_ZERO) {
    230 			inst->U.I.Opcode = RC_OPCODE_MOV;
    231 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
    232 			return;
    233 		}
    234 	}
    235 
    236 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
    237 		if (swz == RC_SWIZZLE_ONE) {
    238 			inst->U.I.Opcode = RC_OPCODE_ADD;
    239 			if (negate)
    240 				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
    241 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
    242 			return;
    243 		} else if (swz == RC_SWIZZLE_ZERO) {
    244 			inst->U.I.Opcode = RC_OPCODE_MOV;
    245 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
    246 			return;
    247 		}
    248 	}
    249 }
    250 
    251 static void constant_folding_mul(struct rc_instruction * inst)
    252 {
    253 	rc_swizzle swz = 0;
    254 	unsigned int negate = 0;
    255 
    256 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
    257 		if (swz == RC_SWIZZLE_ONE) {
    258 			inst->U.I.Opcode = RC_OPCODE_MOV;
    259 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
    260 			if (negate)
    261 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
    262 			return;
    263 		} else if (swz == RC_SWIZZLE_ZERO) {
    264 			inst->U.I.Opcode = RC_OPCODE_MOV;
    265 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
    266 			return;
    267 		}
    268 	}
    269 
    270 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
    271 		if (swz == RC_SWIZZLE_ONE) {
    272 			inst->U.I.Opcode = RC_OPCODE_MOV;
    273 			if (negate)
    274 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
    275 			return;
    276 		} else if (swz == RC_SWIZZLE_ZERO) {
    277 			inst->U.I.Opcode = RC_OPCODE_MOV;
    278 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
    279 			return;
    280 		}
    281 	}
    282 }
    283 
    284 static void constant_folding_add(struct rc_instruction * inst)
    285 {
    286 	rc_swizzle swz = 0;
    287 	unsigned int negate = 0;
    288 
    289 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
    290 		if (swz == RC_SWIZZLE_ZERO) {
    291 			inst->U.I.Opcode = RC_OPCODE_MOV;
    292 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
    293 			return;
    294 		}
    295 	}
    296 
    297 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
    298 		if (swz == RC_SWIZZLE_ZERO) {
    299 			inst->U.I.Opcode = RC_OPCODE_MOV;
    300 			return;
    301 		}
    302 	}
    303 }
    304 
    305 /**
    306  * Replace 0.0, 1.0 and 0.5 immediate constants by their
    307  * respective swizzles. Simplify instructions like ADD dst, src, 0;
    308  */
    309 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
    310 {
    311 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    312 	unsigned int i;
    313 
    314 	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
    315 	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
    316 		struct rc_constant * constant;
    317 		struct rc_src_register newsrc;
    318 		int have_real_reference;
    319 		unsigned int chan;
    320 
    321 		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
    322 		for (chan = 0; chan < 4; ++chan)
    323 			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
    324 				break;
    325 		if (chan == 4) {
    326 			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
    327 			continue;
    328 		}
    329 
    330 		/* Convert immediates to swizzles. */
    331 		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
    332 		    inst->U.I.SrcReg[src].RelAddr ||
    333 		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
    334 			continue;
    335 
    336 		constant =
    337 			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
    338 
    339 		if (constant->Type != RC_CONSTANT_IMMEDIATE)
    340 			continue;
    341 
    342 		newsrc = inst->U.I.SrcReg[src];
    343 		have_real_reference = 0;
    344 		for (chan = 0; chan < 4; ++chan) {
    345 			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
    346 			unsigned int newswz;
    347 			float imm;
    348 			float baseimm;
    349 
    350 			if (swz >= 4)
    351 				continue;
    352 
    353 			imm = constant->u.Immediate[swz];
    354 			baseimm = imm;
    355 			if (imm < 0.0)
    356 				baseimm = -baseimm;
    357 
    358 			if (baseimm == 0.0) {
    359 				newswz = RC_SWIZZLE_ZERO;
    360 			} else if (baseimm == 1.0) {
    361 				newswz = RC_SWIZZLE_ONE;
    362 			} else if (baseimm == 0.5 && c->has_half_swizzles) {
    363 				newswz = RC_SWIZZLE_HALF;
    364 			} else {
    365 				have_real_reference = 1;
    366 				continue;
    367 			}
    368 
    369 			SET_SWZ(newsrc.Swizzle, chan, newswz);
    370 			if (imm < 0.0 && !newsrc.Abs)
    371 				newsrc.Negate ^= 1 << chan;
    372 		}
    373 
    374 		if (!have_real_reference) {
    375 			newsrc.File = RC_FILE_NONE;
    376 			newsrc.Index = 0;
    377 		}
    378 
    379 		/* don't make the swizzle worse */
    380 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
    381 		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
    382 			continue;
    383 
    384 		inst->U.I.SrcReg[src] = newsrc;
    385 	}
    386 
    387 	/* Simplify instructions based on constants */
    388 	if (inst->U.I.Opcode == RC_OPCODE_MAD)
    389 		constant_folding_mad(inst);
    390 
    391 	/* note: MAD can simplify to MUL or ADD */
    392 	if (inst->U.I.Opcode == RC_OPCODE_MUL)
    393 		constant_folding_mul(inst);
    394 	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
    395 		constant_folding_add(inst);
    396 
    397 	/* In case this instruction has been converted, make sure all of the
    398 	 * registers that are no longer used are empty. */
    399 	opcode = rc_get_opcode_info(inst->U.I.Opcode);
    400 	for(i = opcode->NumSrcRegs; i < 3; i++) {
    401 		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
    402 	}
    403 }
    404 
    405 /**
    406  * If src and dst use the same register, this function returns a writemask that
    407  * indicates wich components are read by src.  Otherwise zero is returned.
    408  */
    409 static unsigned int src_reads_dst_mask(struct rc_src_register src,
    410 						struct rc_dst_register dst)
    411 {
    412 	if (dst.File != src.File || dst.Index != src.Index) {
    413 		return 0;
    414 	}
    415 	return rc_swizzle_to_writemask(src.Swizzle);
    416 }
    417 
    418 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
    419  * in any of its channels.  Return 0 otherwise. */
    420 static int src_has_const_swz(struct rc_src_register src) {
    421 	int chan;
    422 	for(chan = 0; chan < 4; chan++) {
    423 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
    424 		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
    425 						|| swz == RC_SWIZZLE_ONE) {
    426 			return 1;
    427 		}
    428 	}
    429 	return 0;
    430 }
    431 
    432 static void presub_scan_read(
    433 	void * data,
    434 	struct rc_instruction * inst,
    435 	struct rc_src_register * src)
    436 {
    437 	struct rc_reader_data * reader_data = data;
    438 	rc_presubtract_op * presub_opcode = reader_data->CbData;
    439 
    440 	if (!rc_inst_can_use_presub(inst, *presub_opcode,
    441 			reader_data->Writer->U.I.DstReg.WriteMask,
    442 			src,
    443 			&reader_data->Writer->U.I.SrcReg[0],
    444 			&reader_data->Writer->U.I.SrcReg[1])) {
    445 		reader_data->Abort = 1;
    446 		return;
    447 	}
    448 }
    449 
    450 static int presub_helper(
    451 	struct radeon_compiler * c,
    452 	struct rc_instruction * inst_add,
    453 	rc_presubtract_op presub_opcode,
    454 	rc_presub_replace_fn presub_replace)
    455 {
    456 	struct rc_reader_data reader_data;
    457 	unsigned int i;
    458 	rc_presubtract_op cb_op = presub_opcode;
    459 
    460 	reader_data.CbData = &cb_op;
    461 	reader_data.ExitOnAbort = 1;
    462 	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
    463 						is_src_clobbered_scan_write);
    464 
    465 	if (reader_data.Abort || reader_data.ReaderCount == 0)
    466 		return 0;
    467 
    468 	for(i = 0; i < reader_data.ReaderCount; i++) {
    469 		unsigned int src_index;
    470 		struct rc_reader reader = reader_data.Readers[i];
    471 		const struct rc_opcode_info * info =
    472 				rc_get_opcode_info(reader.Inst->U.I.Opcode);
    473 
    474 		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
    475 			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
    476 				presub_replace(inst_add, reader.Inst, src_index);
    477 		}
    478 	}
    479 	return 1;
    480 }
    481 
    482 /* This function assumes that inst_add->U.I.SrcReg[0] and
    483  * inst_add->U.I.SrcReg[1] aren't both negative. */
    484 static void presub_replace_add(
    485 	struct rc_instruction * inst_add,
    486 	struct rc_instruction * inst_reader,
    487 	unsigned int src_index)
    488 {
    489 	rc_presubtract_op presub_opcode;
    490 	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
    491 		presub_opcode = RC_PRESUB_SUB;
    492 	else
    493 		presub_opcode = RC_PRESUB_ADD;
    494 
    495 	if (inst_add->U.I.SrcReg[1].Negate) {
    496 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
    497 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
    498 	} else {
    499 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
    500 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
    501 	}
    502 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
    503 	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
    504 	inst_reader->U.I.PreSub.Opcode = presub_opcode;
    505 	inst_reader->U.I.SrcReg[src_index] =
    506 			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
    507 					inst_reader->U.I.PreSub.SrcReg[0]);
    508 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
    509 	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
    510 }
    511 
    512 static int is_presub_candidate(
    513 	struct radeon_compiler * c,
    514 	struct rc_instruction * inst)
    515 {
    516 	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
    517 	unsigned int i;
    518 	unsigned int is_constant[2] = {0, 0};
    519 
    520 	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
    521 
    522 	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
    523 			|| inst->U.I.SaturateMode
    524 			|| inst->U.I.WriteALUResult
    525 			|| inst->U.I.Omod) {
    526 		return 0;
    527 	}
    528 
    529 	/* If both sources use a constant swizzle, then we can't convert it to
    530 	 * a presubtract operation.  In fact for the ADD and SUB presubtract
    531 	 * operations neither source can contain a constant swizzle.  This
    532 	 * specific case is checked in peephole_add_presub_add() when
    533 	 * we make sure the swizzles for both sources are equal, so we
    534 	 * don't need to worry about it here. */
    535 	for (i = 0; i < 2; i++) {
    536 		int chan;
    537 		for (chan = 0; chan < 4; chan++) {
    538 			rc_swizzle swz =
    539 				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
    540 			if (swz == RC_SWIZZLE_ONE
    541 					|| swz == RC_SWIZZLE_ZERO
    542 					|| swz == RC_SWIZZLE_HALF) {
    543 				is_constant[i] = 1;
    544 			}
    545 		}
    546 	}
    547 	if (is_constant[0] && is_constant[1])
    548 		return 0;
    549 
    550 	for(i = 0; i < info->NumSrcRegs; i++) {
    551 		struct rc_src_register src = inst->U.I.SrcReg[i];
    552 		if (src_reads_dst_mask(src, inst->U.I.DstReg))
    553 			return 0;
    554 
    555 		src.File = RC_FILE_PRESUB;
    556 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
    557 			return 0;
    558 	}
    559 	return 1;
    560 }
    561 
    562 static int peephole_add_presub_add(
    563 	struct radeon_compiler * c,
    564 	struct rc_instruction * inst_add)
    565 {
    566 	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
    567         unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
    568         unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
    569 
    570 	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
    571 		return 0;
    572 
    573 	/* src0 and src1 can't have absolute values */
    574 	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
    575 	        return 0;
    576 
    577 	/* presub_replace_add() assumes only one is negative */
    578 	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
    579 	        return 0;
    580 
    581         /* if src0 is negative, at least all bits of dstmask have to be set */
    582         if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
    583 	        return 0;
    584 
    585         /* if src1 is negative, at least all bits of dstmask have to be set */
    586         if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
    587 	        return 0;
    588 
    589 	if (!is_presub_candidate(c, inst_add))
    590 		return 0;
    591 
    592 	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
    593 		rc_remove_instruction(inst_add);
    594 		return 1;
    595 	}
    596 	return 0;
    597 }
    598 
    599 static void presub_replace_inv(
    600 	struct rc_instruction * inst_add,
    601 	struct rc_instruction * inst_reader,
    602 	unsigned int src_index)
    603 {
    604 	/* We must be careful not to modify inst_add, since it
    605 	 * is possible it will remain part of the program.*/
    606 	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
    607 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
    608 	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
    609 	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
    610 						inst_reader->U.I.PreSub.SrcReg[0]);
    611 
    612 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
    613 	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
    614 }
    615 
    616 /**
    617  * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
    618  * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
    619  * of the add instruction must have the constatnt 1 swizzle.  This function
    620  * does not check const registers to see if their value is 1.0, so it should
    621  * be called after the constant_folding optimization.
    622  * @return
    623  * 	0 if the ADD instruction is still part of the program.
    624  * 	1 if the ADD instruction is no longer part of the program.
    625  */
    626 static int peephole_add_presub_inv(
    627 	struct radeon_compiler * c,
    628 	struct rc_instruction * inst_add)
    629 {
    630 	unsigned int i, swz;
    631 
    632 	if (!is_presub_candidate(c, inst_add))
    633 		return 0;
    634 
    635 	/* Check if src0 is 1. */
    636 	/* XXX It would be nice to use is_src_uniform_constant here, but that
    637 	 * function only works if the register's file is RC_FILE_NONE */
    638 	for(i = 0; i < 4; i++ ) {
    639 		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
    640 		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
    641 						&& swz != RC_SWIZZLE_ONE) {
    642 			return 0;
    643 		}
    644 	}
    645 
    646 	/* Check src1. */
    647 	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
    648 						inst_add->U.I.DstReg.WriteMask
    649 		|| inst_add->U.I.SrcReg[1].Abs
    650 		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
    651 			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
    652 		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
    653 
    654 		return 0;
    655 	}
    656 
    657 	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
    658 		rc_remove_instruction(inst_add);
    659 		return 1;
    660 	}
    661 	return 0;
    662 }
    663 
    664 struct peephole_mul_cb_data {
    665 	struct rc_dst_register * Writer;
    666 	unsigned int Clobbered;
    667 };
    668 
    669 static void omod_filter_reader_cb(
    670 	void * userdata,
    671 	struct rc_instruction * inst,
    672 	rc_register_file file,
    673 	unsigned int index,
    674 	unsigned int mask)
    675 {
    676 	struct peephole_mul_cb_data * d = userdata;
    677 	if (rc_src_reads_dst_mask(file, mask, index,
    678 		d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
    679 
    680 		d->Clobbered = 1;
    681 	}
    682 }
    683 
    684 static void omod_filter_writer_cb(
    685 	void * userdata,
    686 	struct rc_instruction * inst,
    687 	rc_register_file file,
    688 	unsigned int index,
    689 	unsigned int mask)
    690 {
    691 	struct peephole_mul_cb_data * d = userdata;
    692 	if (file == d->Writer->File && index == d->Writer->Index &&
    693 					(mask & d->Writer->WriteMask)) {
    694 		d->Clobbered = 1;
    695 	}
    696 }
    697 
    698 static int peephole_mul_omod(
    699 	struct radeon_compiler * c,
    700 	struct rc_instruction * inst_mul,
    701 	struct rc_list * var_list)
    702 {
    703 	unsigned int chan = 0, swz, i;
    704 	int const_index = -1;
    705 	int temp_index = -1;
    706 	float const_value;
    707 	rc_omod_op omod_op = RC_OMOD_DISABLE;
    708 	struct rc_list * writer_list;
    709 	struct rc_variable * var;
    710 	struct peephole_mul_cb_data cb_data;
    711 
    712 	for (i = 0; i < 2; i++) {
    713 		unsigned int j;
    714 		if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
    715 			&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
    716 			return 0;
    717 		}
    718 		if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
    719 			if (temp_index != -1) {
    720 				/* The instruction has two temp sources */
    721 				return 0;
    722 			} else {
    723 				temp_index = i;
    724 				continue;
    725 			}
    726 		}
    727 		/* If we get this far Src[i] must be a constant src */
    728 		if (inst_mul->U.I.SrcReg[i].Negate) {
    729 			return 0;
    730 		}
    731 		/* The constant src needs to read from the same swizzle */
    732 		swz = RC_SWIZZLE_UNUSED;
    733 		chan = 0;
    734 		for (j = 0; j < 4; j++) {
    735 			unsigned int j_swz =
    736 				GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
    737 			if (j_swz == RC_SWIZZLE_UNUSED) {
    738 				continue;
    739 			}
    740 			if (swz == RC_SWIZZLE_UNUSED) {
    741 				swz = j_swz;
    742 				chan = j;
    743 			} else if (j_swz != swz) {
    744 				return 0;
    745 			}
    746 		}
    747 
    748 		if (const_index != -1) {
    749 			/* The instruction has two constant sources */
    750 			return 0;
    751 		} else {
    752 			const_index = i;
    753 		}
    754 	}
    755 
    756 	if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
    757 				inst_mul->U.I.SrcReg[const_index].Index)) {
    758 		return 0;
    759 	}
    760 	const_value = rc_get_constant_value(c,
    761 			inst_mul->U.I.SrcReg[const_index].Index,
    762 			inst_mul->U.I.SrcReg[const_index].Swizzle,
    763 			inst_mul->U.I.SrcReg[const_index].Negate,
    764 			chan);
    765 
    766 	if (const_value == 2.0f) {
    767 		omod_op = RC_OMOD_MUL_2;
    768 	} else if (const_value == 4.0f) {
    769 		omod_op = RC_OMOD_MUL_4;
    770 	} else if (const_value == 8.0f) {
    771 		omod_op = RC_OMOD_MUL_8;
    772 	} else if (const_value == (1.0f / 2.0f)) {
    773 		omod_op = RC_OMOD_DIV_2;
    774 	} else if (const_value == (1.0f / 4.0f)) {
    775 		omod_op = RC_OMOD_DIV_4;
    776 	} else if (const_value == (1.0f / 8.0f)) {
    777 		omod_op = RC_OMOD_DIV_8;
    778 	} else {
    779 		return 0;
    780 	}
    781 
    782 	writer_list = rc_variable_list_get_writers_one_reader(var_list,
    783 		RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
    784 
    785 	if (!writer_list) {
    786 		return 0;
    787 	}
    788 
    789 	cb_data.Clobbered = 0;
    790 	cb_data.Writer = &inst_mul->U.I.DstReg;
    791 	for (var = writer_list->Item; var; var = var->Friend) {
    792 		struct rc_instruction * inst;
    793 		const struct rc_opcode_info * info = rc_get_opcode_info(
    794 				var->Inst->U.I.Opcode);
    795 		if (info->HasTexture) {
    796 			return 0;
    797 		}
    798 		if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
    799 			return 0;
    800 		}
    801 		for (inst = inst_mul->Prev; inst != var->Inst;
    802 							inst = inst->Prev) {
    803 			rc_for_all_reads_mask(inst, omod_filter_reader_cb,
    804 								&cb_data);
    805 			rc_for_all_writes_mask(inst, omod_filter_writer_cb,
    806 								&cb_data);
    807 			if (cb_data.Clobbered) {
    808 				break;
    809 			}
    810 		}
    811 	}
    812 
    813 	if (cb_data.Clobbered) {
    814 		return 0;
    815 	}
    816 
    817 	/* Rewrite the instructions */
    818 	for (var = writer_list->Item; var; var = var->Friend) {
    819 		struct rc_variable * writer = writer_list->Item;
    820 		unsigned conversion_swizzle = rc_make_conversion_swizzle(
    821 					writer->Inst->U.I.DstReg.WriteMask,
    822 					inst_mul->U.I.DstReg.WriteMask);
    823 		writer->Inst->U.I.Omod = omod_op;
    824 		writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
    825 		writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
    826 		rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
    827 		writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
    828 	}
    829 
    830 	rc_remove_instruction(inst_mul);
    831 
    832 	return 1;
    833 }
    834 
    835 /**
    836  * @return
    837  * 	0 if inst is still part of the program.
    838  * 	1 if inst is no longer part of the program.
    839  */
    840 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
    841 {
    842 	switch(inst->U.I.Opcode){
    843 	case RC_OPCODE_ADD:
    844 		if (c->has_presub) {
    845 			if(peephole_add_presub_inv(c, inst))
    846 				return 1;
    847 			if(peephole_add_presub_add(c, inst))
    848 				return 1;
    849 		}
    850 		break;
    851 	default:
    852 		break;
    853 	}
    854 	return 0;
    855 }
    856 
    857 void rc_optimize(struct radeon_compiler * c, void *user)
    858 {
    859 	struct rc_instruction * inst = c->Program.Instructions.Next;
    860 	struct rc_list * var_list;
    861 	while(inst != &c->Program.Instructions) {
    862 		struct rc_instruction * cur = inst;
    863 		inst = inst->Next;
    864 
    865 		constant_folding(c, cur);
    866 
    867 		if(peephole(c, cur))
    868 			continue;
    869 
    870 		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
    871 			copy_propagate(c, cur);
    872 			/* cur may no longer be part of the program */
    873 		}
    874 	}
    875 
    876 	if (!c->has_omod) {
    877 		return;
    878 	}
    879 
    880 	inst = c->Program.Instructions.Next;
    881 	while(inst != &c->Program.Instructions) {
    882 		struct rc_instruction * cur = inst;
    883 		inst = inst->Next;
    884 		if (cur->U.I.Opcode == RC_OPCODE_MUL) {
    885 			var_list = rc_get_variables(c);
    886 			peephole_mul_omod(c, cur, var_list);
    887 		}
    888 	}
    889 }
    890