Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright (C) 2009 Nicolai Haehnle.
      3  * Copyright 2010 Tom Stellard <tstellar (at) gmail.com>
      4  *
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining
      8  * a copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sublicense, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial
     17  * portions of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  */
     28 
     29 #include "radeon_dataflow.h"
     30 
     31 #include "radeon_compiler.h"
     32 #include "radeon_compiler_util.h"
     33 #include "radeon_list.h"
     34 #include "radeon_swizzle.h"
     35 #include "radeon_variable.h"
     36 
     37 struct src_clobbered_reads_cb_data {
     38 	rc_register_file File;
     39 	unsigned int Index;
     40 	unsigned int Mask;
     41 	struct rc_reader_data * ReaderData;
     42 };
     43 
     44 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
     45 						struct rc_instruction *,
     46 						unsigned int);
     47 
     48 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
     49 {
     50 	struct rc_src_register combine;
     51 	combine.File = inner.File;
     52 	combine.Index = inner.Index;
     53 	combine.RelAddr = inner.RelAddr;
     54 	if (outer.Abs) {
     55 		combine.Abs = 1;
     56 		combine.Negate = outer.Negate;
     57 	} else {
     58 		combine.Abs = inner.Abs;
     59 		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
     60 		combine.Negate ^= outer.Negate;
     61 	}
     62 	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
     63 	return combine;
     64 }
     65 
     66 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
     67 						struct rc_src_register * src)
     68 {
     69 	rc_register_file file = src->File;
     70 	struct rc_reader_data * reader_data = data;
     71 
     72 	if(!rc_inst_can_use_presub(inst,
     73 				reader_data->Writer->U.I.PreSub.Opcode,
     74 				rc_swizzle_to_writemask(src->Swizzle),
     75 				src,
     76 				&reader_data->Writer->U.I.PreSub.SrcReg[0],
     77 				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
     78 		reader_data->Abort = 1;
     79 		return;
     80 	}
     81 
     82 	/* XXX This could probably be handled better. */
     83 	if (file == RC_FILE_ADDRESS) {
     84 		reader_data->Abort = 1;
     85 		return;
     86 	}
     87 
     88 	/* These instructions cannot read from the constants file.
     89 	 * see radeonTransformTEX()
     90 	 */
     91 	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
     92 			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
     93 				(inst->U.I.Opcode == RC_OPCODE_TEX ||
     94 				inst->U.I.Opcode == RC_OPCODE_TXB ||
     95 				inst->U.I.Opcode == RC_OPCODE_TXP ||
     96 				inst->U.I.Opcode == RC_OPCODE_TXD ||
     97 				inst->U.I.Opcode == RC_OPCODE_TXL ||
     98 				inst->U.I.Opcode == RC_OPCODE_KIL)){
     99 		reader_data->Abort = 1;
    100 		return;
    101 	}
    102 }
    103 
    104 static void src_clobbered_reads_cb(
    105 	void * data,
    106 	struct rc_instruction * inst,
    107 	struct rc_src_register * src)
    108 {
    109 	struct src_clobbered_reads_cb_data * sc_data = data;
    110 
    111 	if (src->File == sc_data->File
    112 	    && src->Index == sc_data->Index
    113 	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
    114 
    115 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
    116 	}
    117 
    118 	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
    119 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
    120 	}
    121 }
    122 
    123 static void is_src_clobbered_scan_write(
    124 	void * data,
    125 	struct rc_instruction * inst,
    126 	rc_register_file file,
    127 	unsigned int index,
    128 	unsigned int mask)
    129 {
    130 	struct src_clobbered_reads_cb_data sc_data;
    131 	struct rc_reader_data * reader_data = data;
    132 	sc_data.File = file;
    133 	sc_data.Index = index;
    134 	sc_data.Mask = mask;
    135 	sc_data.ReaderData = reader_data;
    136 	rc_for_all_reads_src(reader_data->Writer,
    137 					src_clobbered_reads_cb, &sc_data);
    138 }
    139 
    140 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
    141 {
    142 	struct rc_reader_data reader_data;
    143 	unsigned int i;
    144 
    145 	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
    146 	    inst_mov->U.I.WriteALUResult)
    147 		return;
    148 
    149 	/* Get a list of all the readers of this MOV instruction. */
    150 	reader_data.ExitOnAbort = 1;
    151 	rc_get_readers(c, inst_mov, &reader_data,
    152 		       copy_propagate_scan_read, NULL,
    153 		       is_src_clobbered_scan_write);
    154 
    155 	if (reader_data.Abort || reader_data.ReaderCount == 0)
    156 		return;
    157 
    158 	/* We can propagate SaturateMode if all the readers are MOV instructions
    159 	 * without a presubtract operation, source negation and absolute.
    160 	 * In that case, we just move SaturateMode to all readers. */
    161         if (inst_mov->U.I.SaturateMode) {
    162 		for (i = 0; i < reader_data.ReaderCount; i++) {
    163 			struct rc_instruction * inst = reader_data.Readers[i].Inst;
    164 
    165 			if (inst->U.I.Opcode != RC_OPCODE_MOV ||
    166 			    inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
    167 			    inst->U.I.SrcReg[0].Abs ||
    168 			    inst->U.I.SrcReg[0].Negate) {
    169 				return;
    170 			}
    171 		}
    172 	}
    173 
    174 	/* Propagate the MOV instruction. */
    175 	for (i = 0; i < reader_data.ReaderCount; i++) {
    176 		struct rc_instruction * inst = reader_data.Readers[i].Inst;
    177 		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
    178 
    179 		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
    180 			inst->U.I.PreSub = inst_mov->U.I.PreSub;
    181 		if (!inst->U.I.SaturateMode)
    182 			inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
    183 	}
    184 
    185 	/* Finally, remove the original MOV instruction */
    186 	rc_remove_instruction(inst_mov);
    187 }
    188 
    189 /**
    190  * Check if a source register is actually always the same
    191  * swizzle constant.
    192  */
    193 static int is_src_uniform_constant(struct rc_src_register src,
    194 		rc_swizzle * pswz, unsigned int * pnegate)
    195 {
    196 	int have_used = 0;
    197 
    198 	if (src.File != RC_FILE_NONE) {
    199 		*pswz = 0;
    200 		return 0;
    201 	}
    202 
    203 	for(unsigned int chan = 0; chan < 4; ++chan) {
    204 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
    205 		if (swz < 4) {
    206 			*pswz = 0;
    207 			return 0;
    208 		}
    209 		if (swz == RC_SWIZZLE_UNUSED)
    210 			continue;
    211 
    212 		if (!have_used) {
    213 			*pswz = swz;
    214 			*pnegate = GET_BIT(src.Negate, chan);
    215 			have_used = 1;
    216 		} else {
    217 			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
    218 				*pswz = 0;
    219 				return 0;
    220 			}
    221 		}
    222 	}
    223 
    224 	return 1;
    225 }
    226 
    227 static void constant_folding_mad(struct rc_instruction * inst)
    228 {
    229 	rc_swizzle swz = 0;
    230 	unsigned int negate= 0;
    231 
    232 	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
    233 		if (swz == RC_SWIZZLE_ZERO) {
    234 			inst->U.I.Opcode = RC_OPCODE_MUL;
    235 			return;
    236 		}
    237 	}
    238 
    239 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
    240 		if (swz == RC_SWIZZLE_ONE) {
    241 			inst->U.I.Opcode = RC_OPCODE_ADD;
    242 			if (negate)
    243 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
    244 			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
    245 			return;
    246 		} else if (swz == RC_SWIZZLE_ZERO) {
    247 			inst->U.I.Opcode = RC_OPCODE_MOV;
    248 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
    249 			return;
    250 		}
    251 	}
    252 
    253 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
    254 		if (swz == RC_SWIZZLE_ONE) {
    255 			inst->U.I.Opcode = RC_OPCODE_ADD;
    256 			if (negate)
    257 				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
    258 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
    259 			return;
    260 		} else if (swz == RC_SWIZZLE_ZERO) {
    261 			inst->U.I.Opcode = RC_OPCODE_MOV;
    262 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
    263 			return;
    264 		}
    265 	}
    266 }
    267 
    268 static void constant_folding_mul(struct rc_instruction * inst)
    269 {
    270 	rc_swizzle swz = 0;
    271 	unsigned int negate = 0;
    272 
    273 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
    274 		if (swz == RC_SWIZZLE_ONE) {
    275 			inst->U.I.Opcode = RC_OPCODE_MOV;
    276 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
    277 			if (negate)
    278 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
    279 			return;
    280 		} else if (swz == RC_SWIZZLE_ZERO) {
    281 			inst->U.I.Opcode = RC_OPCODE_MOV;
    282 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
    283 			return;
    284 		}
    285 	}
    286 
    287 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
    288 		if (swz == RC_SWIZZLE_ONE) {
    289 			inst->U.I.Opcode = RC_OPCODE_MOV;
    290 			if (negate)
    291 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
    292 			return;
    293 		} else if (swz == RC_SWIZZLE_ZERO) {
    294 			inst->U.I.Opcode = RC_OPCODE_MOV;
    295 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
    296 			return;
    297 		}
    298 	}
    299 }
    300 
    301 static void constant_folding_add(struct rc_instruction * inst)
    302 {
    303 	rc_swizzle swz = 0;
    304 	unsigned int negate = 0;
    305 
    306 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
    307 		if (swz == RC_SWIZZLE_ZERO) {
    308 			inst->U.I.Opcode = RC_OPCODE_MOV;
    309 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
    310 			return;
    311 		}
    312 	}
    313 
    314 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
    315 		if (swz == RC_SWIZZLE_ZERO) {
    316 			inst->U.I.Opcode = RC_OPCODE_MOV;
    317 			return;
    318 		}
    319 	}
    320 }
    321 
    322 /**
    323  * Replace 0.0, 1.0 and 0.5 immediate constants by their
    324  * respective swizzles. Simplify instructions like ADD dst, src, 0;
    325  */
    326 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
    327 {
    328 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
    329 	unsigned int i;
    330 
    331 	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
    332 	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
    333 		struct rc_constant * constant;
    334 		struct rc_src_register newsrc;
    335 		int have_real_reference;
    336 		unsigned int chan;
    337 
    338 		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
    339 		for (chan = 0; chan < 4; ++chan)
    340 			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
    341 				break;
    342 		if (chan == 4) {
    343 			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
    344 			continue;
    345 		}
    346 
    347 		/* Convert immediates to swizzles. */
    348 		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
    349 		    inst->U.I.SrcReg[src].RelAddr ||
    350 		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
    351 			continue;
    352 
    353 		constant =
    354 			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
    355 
    356 		if (constant->Type != RC_CONSTANT_IMMEDIATE)
    357 			continue;
    358 
    359 		newsrc = inst->U.I.SrcReg[src];
    360 		have_real_reference = 0;
    361 		for (chan = 0; chan < 4; ++chan) {
    362 			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
    363 			unsigned int newswz;
    364 			float imm;
    365 			float baseimm;
    366 
    367 			if (swz >= 4)
    368 				continue;
    369 
    370 			imm = constant->u.Immediate[swz];
    371 			baseimm = imm;
    372 			if (imm < 0.0)
    373 				baseimm = -baseimm;
    374 
    375 			if (baseimm == 0.0) {
    376 				newswz = RC_SWIZZLE_ZERO;
    377 			} else if (baseimm == 1.0) {
    378 				newswz = RC_SWIZZLE_ONE;
    379 			} else if (baseimm == 0.5 && c->has_half_swizzles) {
    380 				newswz = RC_SWIZZLE_HALF;
    381 			} else {
    382 				have_real_reference = 1;
    383 				continue;
    384 			}
    385 
    386 			SET_SWZ(newsrc.Swizzle, chan, newswz);
    387 			if (imm < 0.0 && !newsrc.Abs)
    388 				newsrc.Negate ^= 1 << chan;
    389 		}
    390 
    391 		if (!have_real_reference) {
    392 			newsrc.File = RC_FILE_NONE;
    393 			newsrc.Index = 0;
    394 		}
    395 
    396 		/* don't make the swizzle worse */
    397 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
    398 		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
    399 			continue;
    400 
    401 		inst->U.I.SrcReg[src] = newsrc;
    402 	}
    403 
    404 	/* Simplify instructions based on constants */
    405 	if (inst->U.I.Opcode == RC_OPCODE_MAD)
    406 		constant_folding_mad(inst);
    407 
    408 	/* note: MAD can simplify to MUL or ADD */
    409 	if (inst->U.I.Opcode == RC_OPCODE_MUL)
    410 		constant_folding_mul(inst);
    411 	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
    412 		constant_folding_add(inst);
    413 
    414 	/* In case this instruction has been converted, make sure all of the
    415 	 * registers that are no longer used are empty. */
    416 	opcode = rc_get_opcode_info(inst->U.I.Opcode);
    417 	for(i = opcode->NumSrcRegs; i < 3; i++) {
    418 		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
    419 	}
    420 }
    421 
    422 /**
    423  * If src and dst use the same register, this function returns a writemask that
    424  * indicates wich components are read by src.  Otherwise zero is returned.
    425  */
    426 static unsigned int src_reads_dst_mask(struct rc_src_register src,
    427 						struct rc_dst_register dst)
    428 {
    429 	if (dst.File != src.File || dst.Index != src.Index) {
    430 		return 0;
    431 	}
    432 	return rc_swizzle_to_writemask(src.Swizzle);
    433 }
    434 
    435 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
    436  * in any of its channels.  Return 0 otherwise. */
    437 static int src_has_const_swz(struct rc_src_register src) {
    438 	int chan;
    439 	for(chan = 0; chan < 4; chan++) {
    440 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
    441 		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
    442 						|| swz == RC_SWIZZLE_ONE) {
    443 			return 1;
    444 		}
    445 	}
    446 	return 0;
    447 }
    448 
    449 static void presub_scan_read(
    450 	void * data,
    451 	struct rc_instruction * inst,
    452 	struct rc_src_register * src)
    453 {
    454 	struct rc_reader_data * reader_data = data;
    455 	rc_presubtract_op * presub_opcode = reader_data->CbData;
    456 
    457 	if (!rc_inst_can_use_presub(inst, *presub_opcode,
    458 			reader_data->Writer->U.I.DstReg.WriteMask,
    459 			src,
    460 			&reader_data->Writer->U.I.SrcReg[0],
    461 			&reader_data->Writer->U.I.SrcReg[1])) {
    462 		reader_data->Abort = 1;
    463 		return;
    464 	}
    465 }
    466 
    467 static int presub_helper(
    468 	struct radeon_compiler * c,
    469 	struct rc_instruction * inst_add,
    470 	rc_presubtract_op presub_opcode,
    471 	rc_presub_replace_fn presub_replace)
    472 {
    473 	struct rc_reader_data reader_data;
    474 	unsigned int i;
    475 	rc_presubtract_op cb_op = presub_opcode;
    476 
    477 	reader_data.CbData = &cb_op;
    478 	reader_data.ExitOnAbort = 1;
    479 	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
    480 						is_src_clobbered_scan_write);
    481 
    482 	if (reader_data.Abort || reader_data.ReaderCount == 0)
    483 		return 0;
    484 
    485 	for(i = 0; i < reader_data.ReaderCount; i++) {
    486 		unsigned int src_index;
    487 		struct rc_reader reader = reader_data.Readers[i];
    488 		const struct rc_opcode_info * info =
    489 				rc_get_opcode_info(reader.Inst->U.I.Opcode);
    490 
    491 		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
    492 			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
    493 				presub_replace(inst_add, reader.Inst, src_index);
    494 		}
    495 	}
    496 	return 1;
    497 }
    498 
    499 /* This function assumes that inst_add->U.I.SrcReg[0] and
    500  * inst_add->U.I.SrcReg[1] aren't both negative. */
    501 static void presub_replace_add(
    502 	struct rc_instruction * inst_add,
    503 	struct rc_instruction * inst_reader,
    504 	unsigned int src_index)
    505 {
    506 	rc_presubtract_op presub_opcode;
    507 	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
    508 		presub_opcode = RC_PRESUB_SUB;
    509 	else
    510 		presub_opcode = RC_PRESUB_ADD;
    511 
    512 	if (inst_add->U.I.SrcReg[1].Negate) {
    513 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
    514 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
    515 	} else {
    516 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
    517 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
    518 	}
    519 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
    520 	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
    521 	inst_reader->U.I.PreSub.Opcode = presub_opcode;
    522 	inst_reader->U.I.SrcReg[src_index] =
    523 			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
    524 					inst_reader->U.I.PreSub.SrcReg[0]);
    525 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
    526 	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
    527 }
    528 
    529 static int is_presub_candidate(
    530 	struct radeon_compiler * c,
    531 	struct rc_instruction * inst)
    532 {
    533 	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
    534 	unsigned int i;
    535 	unsigned int is_constant[2] = {0, 0};
    536 
    537 	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
    538 
    539 	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
    540 			|| inst->U.I.SaturateMode
    541 			|| inst->U.I.WriteALUResult
    542 			|| inst->U.I.Omod) {
    543 		return 0;
    544 	}
    545 
    546 	/* If both sources use a constant swizzle, then we can't convert it to
    547 	 * a presubtract operation.  In fact for the ADD and SUB presubtract
    548 	 * operations neither source can contain a constant swizzle.  This
    549 	 * specific case is checked in peephole_add_presub_add() when
    550 	 * we make sure the swizzles for both sources are equal, so we
    551 	 * don't need to worry about it here. */
    552 	for (i = 0; i < 2; i++) {
    553 		int chan;
    554 		for (chan = 0; chan < 4; chan++) {
    555 			rc_swizzle swz =
    556 				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
    557 			if (swz == RC_SWIZZLE_ONE
    558 					|| swz == RC_SWIZZLE_ZERO
    559 					|| swz == RC_SWIZZLE_HALF) {
    560 				is_constant[i] = 1;
    561 			}
    562 		}
    563 	}
    564 	if (is_constant[0] && is_constant[1])
    565 		return 0;
    566 
    567 	for(i = 0; i < info->NumSrcRegs; i++) {
    568 		struct rc_src_register src = inst->U.I.SrcReg[i];
    569 		if (src_reads_dst_mask(src, inst->U.I.DstReg))
    570 			return 0;
    571 
    572 		src.File = RC_FILE_PRESUB;
    573 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
    574 			return 0;
    575 	}
    576 	return 1;
    577 }
    578 
    579 static int peephole_add_presub_add(
    580 	struct radeon_compiler * c,
    581 	struct rc_instruction * inst_add)
    582 {
    583 	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
    584         unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
    585         unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
    586 
    587 	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
    588 		return 0;
    589 
    590 	/* src0 and src1 can't have absolute values */
    591 	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
    592 	        return 0;
    593 
    594 	/* presub_replace_add() assumes only one is negative */
    595 	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
    596 	        return 0;
    597 
    598         /* if src0 is negative, at least all bits of dstmask have to be set */
    599         if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
    600 	        return 0;
    601 
    602         /* if src1 is negative, at least all bits of dstmask have to be set */
    603         if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
    604 	        return 0;
    605 
    606 	if (!is_presub_candidate(c, inst_add))
    607 		return 0;
    608 
    609 	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
    610 		rc_remove_instruction(inst_add);
    611 		return 1;
    612 	}
    613 	return 0;
    614 }
    615 
    616 static void presub_replace_inv(
    617 	struct rc_instruction * inst_add,
    618 	struct rc_instruction * inst_reader,
    619 	unsigned int src_index)
    620 {
    621 	/* We must be careful not to modify inst_add, since it
    622 	 * is possible it will remain part of the program.*/
    623 	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
    624 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
    625 	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
    626 	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
    627 						inst_reader->U.I.PreSub.SrcReg[0]);
    628 
    629 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
    630 	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
    631 }
    632 
    633 /**
    634  * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
    635  * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
    636  * of the add instruction must have the constatnt 1 swizzle.  This function
    637  * does not check const registers to see if their value is 1.0, so it should
    638  * be called after the constant_folding optimization.
    639  * @return
    640  * 	0 if the ADD instruction is still part of the program.
    641  * 	1 if the ADD instruction is no longer part of the program.
    642  */
    643 static int peephole_add_presub_inv(
    644 	struct radeon_compiler * c,
    645 	struct rc_instruction * inst_add)
    646 {
    647 	unsigned int i, swz;
    648 
    649 	if (!is_presub_candidate(c, inst_add))
    650 		return 0;
    651 
    652 	/* Check if src0 is 1. */
    653 	/* XXX It would be nice to use is_src_uniform_constant here, but that
    654 	 * function only works if the register's file is RC_FILE_NONE */
    655 	for(i = 0; i < 4; i++ ) {
    656 		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
    657 		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
    658 						&& swz != RC_SWIZZLE_ONE) {
    659 			return 0;
    660 		}
    661 	}
    662 
    663 	/* Check src1. */
    664 	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
    665 						inst_add->U.I.DstReg.WriteMask
    666 		|| inst_add->U.I.SrcReg[1].Abs
    667 		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
    668 			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
    669 		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
    670 
    671 		return 0;
    672 	}
    673 
    674 	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
    675 		rc_remove_instruction(inst_add);
    676 		return 1;
    677 	}
    678 	return 0;
    679 }
    680 
    681 struct peephole_mul_cb_data {
    682 	struct rc_dst_register * Writer;
    683 	unsigned int Clobbered;
    684 };
    685 
    686 static void omod_filter_reader_cb(
    687 	void * userdata,
    688 	struct rc_instruction * inst,
    689 	rc_register_file file,
    690 	unsigned int index,
    691 	unsigned int mask)
    692 {
    693 	struct peephole_mul_cb_data * d = userdata;
    694 	if (rc_src_reads_dst_mask(file, mask, index,
    695 		d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
    696 
    697 		d->Clobbered = 1;
    698 	}
    699 }
    700 
    701 static void omod_filter_writer_cb(
    702 	void * userdata,
    703 	struct rc_instruction * inst,
    704 	rc_register_file file,
    705 	unsigned int index,
    706 	unsigned int mask)
    707 {
    708 	struct peephole_mul_cb_data * d = userdata;
    709 	if (file == d->Writer->File && index == d->Writer->Index &&
    710 					(mask & d->Writer->WriteMask)) {
    711 		d->Clobbered = 1;
    712 	}
    713 }
    714 
    715 static int peephole_mul_omod(
    716 	struct radeon_compiler * c,
    717 	struct rc_instruction * inst_mul,
    718 	struct rc_list * var_list)
    719 {
    720 	unsigned int chan = 0, swz, i;
    721 	int const_index = -1;
    722 	int temp_index = -1;
    723 	float const_value;
    724 	rc_omod_op omod_op = RC_OMOD_DISABLE;
    725 	struct rc_list * writer_list;
    726 	struct rc_variable * var;
    727 	struct peephole_mul_cb_data cb_data;
    728 	unsigned writemask_sum;
    729 
    730 	for (i = 0; i < 2; i++) {
    731 		unsigned int j;
    732 		if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
    733 			&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
    734 			return 0;
    735 		}
    736 		if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
    737 			if (temp_index != -1) {
    738 				/* The instruction has two temp sources */
    739 				return 0;
    740 			} else {
    741 				temp_index = i;
    742 				continue;
    743 			}
    744 		}
    745 		/* If we get this far Src[i] must be a constant src */
    746 		if (inst_mul->U.I.SrcReg[i].Negate) {
    747 			return 0;
    748 		}
    749 		/* The constant src needs to read from the same swizzle */
    750 		swz = RC_SWIZZLE_UNUSED;
    751 		chan = 0;
    752 		for (j = 0; j < 4; j++) {
    753 			unsigned int j_swz =
    754 				GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
    755 			if (j_swz == RC_SWIZZLE_UNUSED) {
    756 				continue;
    757 			}
    758 			if (swz == RC_SWIZZLE_UNUSED) {
    759 				swz = j_swz;
    760 				chan = j;
    761 			} else if (j_swz != swz) {
    762 				return 0;
    763 			}
    764 		}
    765 
    766 		if (const_index != -1) {
    767 			/* The instruction has two constant sources */
    768 			return 0;
    769 		} else {
    770 			const_index = i;
    771 		}
    772 	}
    773 
    774 	if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
    775 				inst_mul->U.I.SrcReg[const_index].Index)) {
    776 		return 0;
    777 	}
    778 	const_value = rc_get_constant_value(c,
    779 			inst_mul->U.I.SrcReg[const_index].Index,
    780 			inst_mul->U.I.SrcReg[const_index].Swizzle,
    781 			inst_mul->U.I.SrcReg[const_index].Negate,
    782 			chan);
    783 
    784 	if (const_value == 2.0f) {
    785 		omod_op = RC_OMOD_MUL_2;
    786 	} else if (const_value == 4.0f) {
    787 		omod_op = RC_OMOD_MUL_4;
    788 	} else if (const_value == 8.0f) {
    789 		omod_op = RC_OMOD_MUL_8;
    790 	} else if (const_value == (1.0f / 2.0f)) {
    791 		omod_op = RC_OMOD_DIV_2;
    792 	} else if (const_value == (1.0f / 4.0f)) {
    793 		omod_op = RC_OMOD_DIV_4;
    794 	} else if (const_value == (1.0f / 8.0f)) {
    795 		omod_op = RC_OMOD_DIV_8;
    796 	} else {
    797 		return 0;
    798 	}
    799 
    800 	writer_list = rc_variable_list_get_writers_one_reader(var_list,
    801 		RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
    802 
    803 	if (!writer_list) {
    804 		return 0;
    805 	}
    806 
    807 	cb_data.Clobbered = 0;
    808 	cb_data.Writer = &inst_mul->U.I.DstReg;
    809 	for (var = writer_list->Item; var; var = var->Friend) {
    810 		struct rc_instruction * inst;
    811 		const struct rc_opcode_info * info = rc_get_opcode_info(
    812 				var->Inst->U.I.Opcode);
    813 		if (info->HasTexture) {
    814 			return 0;
    815 		}
    816 		if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
    817 			return 0;
    818 		}
    819 		for (inst = inst_mul->Prev; inst != var->Inst;
    820 							inst = inst->Prev) {
    821 			rc_for_all_reads_mask(inst, omod_filter_reader_cb,
    822 								&cb_data);
    823 			rc_for_all_writes_mask(inst, omod_filter_writer_cb,
    824 								&cb_data);
    825 			if (cb_data.Clobbered) {
    826 				break;
    827 			}
    828 		}
    829 	}
    830 
    831 	if (cb_data.Clobbered) {
    832 		return 0;
    833 	}
    834 
    835 	/* Rewrite the instructions */
    836 	writemask_sum = rc_variable_writemask_sum(writer_list->Item);
    837 	for (var = writer_list->Item; var; var = var->Friend) {
    838 		struct rc_variable * writer = var;
    839 		unsigned conversion_swizzle = rc_make_conversion_swizzle(
    840 					writemask_sum,
    841 					inst_mul->U.I.DstReg.WriteMask);
    842 		writer->Inst->U.I.Omod = omod_op;
    843 		writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
    844 		writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
    845 		rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
    846 		writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
    847 	}
    848 
    849 	rc_remove_instruction(inst_mul);
    850 
    851 	return 1;
    852 }
    853 
    854 /**
    855  * @return
    856  * 	0 if inst is still part of the program.
    857  * 	1 if inst is no longer part of the program.
    858  */
    859 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
    860 {
    861 	switch(inst->U.I.Opcode){
    862 	case RC_OPCODE_ADD:
    863 		if (c->has_presub) {
    864 			if(peephole_add_presub_inv(c, inst))
    865 				return 1;
    866 			if(peephole_add_presub_add(c, inst))
    867 				return 1;
    868 		}
    869 		break;
    870 	default:
    871 		break;
    872 	}
    873 	return 0;
    874 }
    875 
    876 void rc_optimize(struct radeon_compiler * c, void *user)
    877 {
    878 	struct rc_instruction * inst = c->Program.Instructions.Next;
    879 	struct rc_list * var_list;
    880 	while(inst != &c->Program.Instructions) {
    881 		struct rc_instruction * cur = inst;
    882 		inst = inst->Next;
    883 
    884 		constant_folding(c, cur);
    885 
    886 		if(peephole(c, cur))
    887 			continue;
    888 
    889 		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
    890 			copy_propagate(c, cur);
    891 			/* cur may no longer be part of the program */
    892 		}
    893 	}
    894 
    895 	if (!c->has_omod) {
    896 		return;
    897 	}
    898 
    899 	inst = c->Program.Instructions.Next;
    900 	while(inst != &c->Program.Instructions) {
    901 		struct rc_instruction * cur = inst;
    902 		inst = inst->Next;
    903 		if (cur->U.I.Opcode == RC_OPCODE_MUL) {
    904 			var_list = rc_get_variables(c);
    905 			peephole_mul_omod(c, cur, var_list);
    906 		}
    907 	}
    908 }
    909