Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright (C) 2009 Nicolai Haehnle.
      3  * Copyright 2012 Advanced Micro Devices, Inc.
      4  *
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining
      8  * a copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sublicense, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial
     17  * portions of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  * Authors:
     28  * Nicolai Haehnle
     29  * Tom Stellard <thomas.stellard (at) amd.com>
     30  */
     31 
     32 #include "radeon_dataflow.h"
     33 
     34 #include "radeon_code.h"
     35 #include "radeon_compiler.h"
     36 #include "radeon_compiler_util.h"
     37 #include "radeon_swizzle.h"
     38 
     39 
     40 static void rewrite_source(struct radeon_compiler * c,
     41 		struct rc_instruction * inst, unsigned src)
     42 {
     43 	struct rc_swizzle_split split;
     44 	unsigned int tempreg = rc_find_free_temporary(c);
     45 	unsigned int usemask;
     46 
     47 	usemask = 0;
     48 	for(unsigned int chan = 0; chan < 4; ++chan) {
     49 		if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
     50 			usemask |= 1 << chan;
     51 	}
     52 
     53 	c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
     54 
     55 	for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
     56 		struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
     57 		unsigned int phase_refmask;
     58 		unsigned int masked_negate;
     59 
     60 		mov->U.I.Opcode = RC_OPCODE_MOV;
     61 		mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
     62 		mov->U.I.DstReg.Index = tempreg;
     63 		mov->U.I.DstReg.WriteMask = split.Phase[phase];
     64 		mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
     65 		mov->U.I.PreSub = inst->U.I.PreSub;
     66 
     67 		phase_refmask = 0;
     68 		for(unsigned int chan = 0; chan < 4; ++chan) {
     69 			if (!GET_BIT(split.Phase[phase], chan))
     70 				SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
     71 			else
     72 				phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
     73 		}
     74 
     75 		phase_refmask &= RC_MASK_XYZW;
     76 
     77 		masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
     78 		if (masked_negate == 0)
     79 			mov->U.I.SrcReg[0].Negate = 0;
     80 		else if (masked_negate == split.Phase[phase])
     81 			mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
     82 
     83 	}
     84 
     85 	inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
     86 	inst->U.I.SrcReg[src].Index = tempreg;
     87 	inst->U.I.SrcReg[src].Swizzle = 0;
     88 	inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
     89 	inst->U.I.SrcReg[src].Abs = 0;
     90 	for(unsigned int chan = 0; chan < 4; ++chan) {
     91 		SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
     92 				GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
     93 	}
     94 }
     95 
     96 /**
     97  * This function will attempt to rewrite non-native swizzles that read from
     98  * immediate registers by rearranging the immediates to allow the
     99  * instruction to use native swizzles.
    100  */
    101 static unsigned try_rewrite_constant(struct radeon_compiler *c,
    102 					struct rc_src_register *reg)
    103 {
    104 	unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz;
    105 	unsigned all_inline = 0;
    106 	float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f};
    107 
    108 	if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) {
    109 		/* The register does not contain immediates, but if all
    110 		 * the swizzles are inline constants, we can still rewrite
    111 		 * it. */
    112 
    113 		new_swizzle = RC_SWIZZLE_XYZW;
    114 		for (chan = 0 ; chan < 4; chan++) {
    115 			unsigned swz = GET_SWZ(reg->Swizzle, chan);
    116 			if (swz <= RC_SWIZZLE_W) {
    117 				return 0;
    118 			}
    119 			if (swz == RC_SWIZZLE_UNUSED) {
    120 				SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED);
    121 			}
    122 		}
    123 		all_inline = 1;
    124 	} else {
    125 		new_swizzle = reg->Swizzle;
    126 	}
    127 
    128 	swz = RC_SWIZZLE_UNUSED;
    129 	found_swizzle = 1;
    130 	/* Check if all channels have the same swizzle.  If they do we can skip
    131 	 * the search for a native swizzle.  We only need to check the first
    132 	 * three channels, because any swizzle is legal in the fourth channel.
    133 	 */
    134 	for (chan = 0; chan < 3; chan++) {
    135 		unsigned chan_swz = GET_SWZ(reg->Swizzle, chan);
    136 		if (chan_swz == RC_SWIZZLE_UNUSED) {
    137 			continue;
    138 		}
    139 		if (swz == RC_SWIZZLE_UNUSED) {
    140 			swz = chan_swz;
    141 		} else if (swz != chan_swz) {
    142 			found_swizzle = 0;
    143 			break;
    144 		}
    145 	}
    146 
    147 	/* Find a legal swizzle */
    148 
    149 	/* This loop attempts to find a native swizzle where all the
    150 	 * channels are different. */
    151 	while (!found_swizzle && !all_inline) {
    152 		swz0 = GET_SWZ(new_swizzle, 0);
    153 		swz1 = GET_SWZ(new_swizzle, 1);
    154 		swz2 = GET_SWZ(new_swizzle, 2);
    155 
    156 		/* Swizzle .W. is never legal. */
    157 		if (swz1 == RC_SWIZZLE_W ||
    158 			swz1 == RC_SWIZZLE_UNUSED ||
    159 			swz1 == RC_SWIZZLE_ZERO ||
    160 			swz1 == RC_SWIZZLE_HALF ||
    161 			swz1 == RC_SWIZZLE_ONE) {
    162 			/* We chose Z, because there are two non-repeating
    163 			 * swizzle combinations of the form .Z. There are
    164 			 * only one combination each for .X. and .Y. */
    165 			SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
    166 			continue;
    167 		}
    168 
    169 		if (swz2 == RC_SWIZZLE_UNUSED) {
    170 			/* We choose Y, because there are two non-repeating
    171 			 * swizzle combinations of the form ..Y */
    172 			SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
    173 			continue;
    174 		}
    175 
    176 		switch (swz0) {
    177 		/* X.. */
    178 		case RC_SWIZZLE_X:
    179 			/* Legal swizzles that start with X: XYZ, XXX */
    180 			switch (swz1) {
    181 			/* XX. */
    182 			case RC_SWIZZLE_X:
    183 				/*  The new swizzle will be:
    184 				 *  ZXY (XX. => ZX. => ZXY) */
    185 				SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
    186 				break;
    187 			/* XY. */
    188 			case RC_SWIZZLE_Y:
    189 				/* The new swizzle is XYZ */
    190 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z);
    191 				found_swizzle = 1;
    192 				break;
    193 			/* XZ. */
    194 			case RC_SWIZZLE_Z:
    195 				/* XZZ */
    196 				if (swz2 == RC_SWIZZLE_Z) {
    197 					/* The new swizzle is XYZ */
    198 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y);
    199 					found_swizzle = 1;
    200 				} else { /* XZ[^Z] */
    201 					/* The new swizzle will be:
    202 					 * YZX (XZ. => YZ. => YZX) */
    203 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y);
    204 				}
    205 				break;
    206 			/* XW. Should have already been handled. */
    207 			case RC_SWIZZLE_W:
    208 				assert(0);
    209 				break;
    210 			}
    211 			break;
    212 		/* Y.. */
    213 		case RC_SWIZZLE_Y:
    214 			/* Legal swizzles that start with Y: YYY, YZX */
    215 			switch (swz1) {
    216 			/* YY. */
    217 			case RC_SWIZZLE_Y:
    218 				/* The new swizzle will be:
    219 				 * XYZ (YY. => XY. => XYZ) */
    220 				SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
    221 				break;
    222 			/* YZ. */
    223 			case RC_SWIZZLE_Z:
    224 				/* The new swizzle is YZX */
    225 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X);
    226 				found_swizzle = 1;
    227 				break;
    228 			/* YX. */
    229 			case RC_SWIZZLE_X:
    230 				/* YXX */
    231 				if (swz2 == RC_SWIZZLE_X) {
    232 					/*The new swizzle is YZX */
    233 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
    234 					found_swizzle = 1;
    235 				} else { /* YX[^X] */
    236 					/* The new swizzle will be:
    237 					 * ZXY (YX. => ZX. -> ZXY) */
    238 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
    239 				}
    240 				break;
    241 			/* YW. Should have already been handled. */
    242 			case RC_SWIZZLE_W:
    243 				assert(0);
    244 				break;
    245 			}
    246 			break;
    247 		/* Z.. */
    248 		case RC_SWIZZLE_Z:
    249 			/* Legal swizzles that start with Z: ZZZ, ZXY */
    250 			switch (swz1) {
    251 			/* ZZ. */
    252 			case RC_SWIZZLE_Z:
    253 				/* The new swizzle will be:
    254 				 * WZY (ZZ. => WZ. => WZY) */
    255 				SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W);
    256 				break;
    257 			/* ZX. */
    258 			case RC_SWIZZLE_X:
    259 				/* The new swizzle is ZXY */
    260 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
    261 				found_swizzle = 1;
    262 				break;
    263 			/* ZY. */
    264 			case RC_SWIZZLE_Y:
    265 				/* ZYY */
    266 				if (swz2 == RC_SWIZZLE_Y) {
    267 					/* The new swizzle is ZXY */
    268 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X);
    269 					found_swizzle = 1;
    270 				} else { /* ZY[^Y] */
    271 					/* The new swizzle will be:
    272 					 * XYZ (ZY. => XY. => XYZ) */
    273 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
    274 				}
    275 				break;
    276 			/* ZW. Should have already been handled. */
    277 			case RC_SWIZZLE_W:
    278 				assert(0);
    279 				break;
    280 			}
    281 			break;
    282 
    283 		/* W.. */
    284 		case RC_SWIZZLE_W:
    285 			/* Legal swizzles that start with X: WWW, WZY */
    286 			switch (swz1) {
    287 			/* WW. Should have already been handled. */
    288 			case RC_SWIZZLE_W:
    289 				assert(0);
    290 				break;
    291 			/* WZ. */
    292 			case RC_SWIZZLE_Z:
    293 				/* The new swizzle will be WZY */
    294 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
    295 				found_swizzle = 1;
    296 				break;
    297 			/* WX. */
    298 			case RC_SWIZZLE_X:
    299 			/* WY. */
    300 			case RC_SWIZZLE_Y:
    301 				/* W[XY]Y */
    302 				if (swz2 == RC_SWIZZLE_Y) {
    303 					/* The new swizzle will be WZY */
    304 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
    305 					found_swizzle = 1;
    306 				} else { /* W[XY][^Y] */
    307 					/* The new swizzle will be:
    308 					 * ZXY (WX. => XX. => ZX. => ZXY) or
    309 					 * XYZ (WY. => XY. => XYZ)
    310 					 */
    311 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
    312 				}
    313 				break;
    314 			}
    315 			break;
    316 		/* U.. 0.. 1.. H..*/
    317 		case RC_SWIZZLE_UNUSED:
    318 		case RC_SWIZZLE_ZERO:
    319 		case RC_SWIZZLE_ONE:
    320 		case RC_SWIZZLE_HALF:
    321 			SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
    322 			break;
    323 		}
    324 	}
    325 
    326 	/* Handle the swizzle in the w channel. */
    327 	swz3 = GET_SWZ(reg->Swizzle, 3);
    328 
    329 	/* We can skip this if the swizzle in channel w is an inline constant. */
    330 	if (swz3 <= RC_SWIZZLE_W) {
    331 		for (chan = 0; chan < 3; chan++) {
    332 			unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
    333 			unsigned new_swz = GET_SWZ(new_swizzle, chan);
    334 			/* If the swizzle in the w channel is the same as the
    335 			 * swizzle in any other channels, we need to rewrite it.
    336 			 * For example:
    337 			 * reg->Swizzle == XWZW
    338 			 * new_swizzle  == XYZX
    339 			 * Since the swizzle in the y channel is being
    340 			 * rewritten from W -> Y we need to change the swizzle
    341 			 * in the w channel from W -> Y as well.
    342 			 */
    343 			if (old_swz == swz3) {
    344 				SET_SWZ(new_swizzle, 3,
    345 						GET_SWZ(new_swizzle, chan));
    346 				break;
    347 			}
    348 
    349 			/* The swizzle in channel w will be overwritten by one
    350 			 * of the new swizzles. */
    351 			if (new_swz == swz3) {
    352 				/* Find an unused swizzle */
    353 				unsigned i;
    354 				unsigned used = 0;
    355 				for (i = 0; i < 3; i++) {
    356 					used |= 1 << GET_SWZ(new_swizzle, i);
    357 				}
    358 				for (i = 0; i < 4; i++) {
    359 					if (used & (1 << i)) {
    360 						continue;
    361 					}
    362 					SET_SWZ(new_swizzle, 3, i);
    363 				}
    364 			}
    365 		}
    366 	}
    367 
    368 	for (chan = 0; chan < 4; chan++) {
    369 		unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
    370 		unsigned new_swz = GET_SWZ(new_swizzle, chan);
    371 
    372 		if (old_swz == RC_SWIZZLE_UNUSED) {
    373 			continue;
    374 		}
    375 
    376 		/* We don't need to change the swizzle in channel w if it is
    377 		 * an inline constant.  These are always legal in the w channel.
    378 		 *
    379 		 * Swizzles with a value > RC_SWIZZLE_W are inline constants.
    380 		 */
    381 		if (chan == 3 && old_swz > RC_SWIZZLE_W) {
    382 			continue;
    383 		}
    384 
    385 		assert(new_swz <= RC_SWIZZLE_W);
    386 
    387 		switch (old_swz) {
    388 		case RC_SWIZZLE_ZERO:
    389 			imms[new_swz] = 0.0f;
    390 			break;
    391 		case RC_SWIZZLE_HALF:
    392 			if (reg->Negate & (1 << chan)) {
    393 				imms[new_swz] = -0.5f;
    394 			} else {
    395 				imms[new_swz] = 0.5f;
    396 			}
    397 			break;
    398 		case RC_SWIZZLE_ONE:
    399 			if (reg->Negate & (1 << chan)) {
    400 				imms[new_swz] = -1.0f;
    401 			} else {
    402 				imms[new_swz] = 1.0f;
    403 			}
    404 			break;
    405 		default:
    406 			imms[new_swz] = rc_get_constant_value(c, reg->Index,
    407 					reg->Swizzle, reg->Negate, chan);
    408 		}
    409 		SET_SWZ(reg->Swizzle, chan, new_swz);
    410 	}
    411 	reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants,
    412 							imms);
    413 	/* We need to set the register file to CONSTANT in case we are
    414 	 * converting a non-constant register with constant swizzles (e.g.
    415 	 * ONE, ZERO, HALF).
    416 	 */
    417 	reg->File = RC_FILE_CONSTANT;
    418 	reg->Negate = 0;
    419 	return 1;
    420 }
    421 
    422 void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
    423 {
    424 	struct rc_instruction * inst;
    425 
    426 	for(inst = c->Program.Instructions.Next;
    427 					inst != &c->Program.Instructions;
    428 					inst = inst->Next) {
    429 		const struct rc_opcode_info * opcode =
    430 					rc_get_opcode_info(inst->U.I.Opcode);
    431 		unsigned int src;
    432 
    433 		for(src = 0; src < opcode->NumSrcRegs; ++src) {
    434 			struct rc_src_register *reg = &inst->U.I.SrcReg[src];
    435 			if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) {
    436 				continue;
    437 			}
    438 			if (!c->is_r500 &&
    439 			    c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS &&
    440 			    try_rewrite_constant(c, reg)) {
    441 				continue;
    442 			}
    443 			rewrite_source(c, inst, src);
    444 		}
    445 	}
    446 	if (c->Debug & RC_DBG_LOG)
    447 		rc_constants_print(&c->Program.Constants);
    448 }
    449