Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright (C) 2009 Nicolai Haehnle.
      3  *
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining
      7  * a copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sublicense, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial
     16  * portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  */
     27 
     28 #include "radeon_program_pair.h"
     29 
     30 #include "radeon_compiler.h"
     31 #include "radeon_compiler_util.h"
     32 
     33 
     34 /**
     35  * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
     36  * and reverse the order of arguments for CMP.
     37  */
     38 static void final_rewrite(struct rc_sub_instruction *inst)
     39 {
     40 	struct rc_src_register tmp;
     41 
     42 	switch(inst->Opcode) {
     43 	case RC_OPCODE_ADD:
     44 		inst->SrcReg[2] = inst->SrcReg[1];
     45 		inst->SrcReg[1].File = RC_FILE_NONE;
     46 		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
     47 		inst->SrcReg[1].Negate = RC_MASK_NONE;
     48 		inst->Opcode = RC_OPCODE_MAD;
     49 		break;
     50 	case RC_OPCODE_CMP:
     51 		tmp = inst->SrcReg[2];
     52 		inst->SrcReg[2] = inst->SrcReg[0];
     53 		inst->SrcReg[0] = tmp;
     54 		break;
     55 	case RC_OPCODE_MOV:
     56 		/* AMD say we should use CMP.
     57 		 * However, when we transform
     58 		 *  KIL -r0;
     59 		 * into
     60 		 *  CMP tmp, -r0, -r0, 0;
     61 		 *  KIL tmp;
     62 		 * we get incorrect behaviour on R500 when r0 == 0.0.
     63 		 * It appears that the R500 KIL hardware treats -0.0 as less
     64 		 * than zero.
     65 		 */
     66 		inst->SrcReg[1].File = RC_FILE_NONE;
     67 		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
     68 		inst->SrcReg[2].File = RC_FILE_NONE;
     69 		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
     70 		inst->Opcode = RC_OPCODE_MAD;
     71 		break;
     72 	case RC_OPCODE_MUL:
     73 		inst->SrcReg[2].File = RC_FILE_NONE;
     74 		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
     75 		inst->Opcode = RC_OPCODE_MAD;
     76 		break;
     77 	default:
     78 		/* nothing to do */
     79 		break;
     80 	}
     81 }
     82 
     83 
     84 /**
     85  * Classify an instruction according to which ALUs etc. it needs
     86  */
     87 static void classify_instruction(struct rc_sub_instruction * inst,
     88 	int * needrgb, int * needalpha, int * istranscendent)
     89 {
     90 	*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
     91 	*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
     92 	*istranscendent = 0;
     93 
     94 	if (inst->WriteALUResult == RC_ALURESULT_X)
     95 		*needrgb = 1;
     96 	else if (inst->WriteALUResult == RC_ALURESULT_W)
     97 		*needalpha = 1;
     98 
     99 	switch(inst->Opcode) {
    100 	case RC_OPCODE_ADD:
    101 	case RC_OPCODE_CMP:
    102 	case RC_OPCODE_CND:
    103 	case RC_OPCODE_DDX:
    104 	case RC_OPCODE_DDY:
    105 	case RC_OPCODE_FRC:
    106 	case RC_OPCODE_MAD:
    107 	case RC_OPCODE_MAX:
    108 	case RC_OPCODE_MIN:
    109 	case RC_OPCODE_MOV:
    110 	case RC_OPCODE_MUL:
    111 		break;
    112 	case RC_OPCODE_COS:
    113 	case RC_OPCODE_EX2:
    114 	case RC_OPCODE_LG2:
    115 	case RC_OPCODE_RCP:
    116 	case RC_OPCODE_RSQ:
    117 	case RC_OPCODE_SIN:
    118 		*istranscendent = 1;
    119 		*needalpha = 1;
    120 		break;
    121 	case RC_OPCODE_DP4:
    122 		*needalpha = 1;
    123 		/* fall through */
    124 	case RC_OPCODE_DP3:
    125 		*needrgb = 1;
    126 		break;
    127 	default:
    128 		break;
    129 	}
    130 }
    131 
    132 static void src_uses(struct rc_src_register src, unsigned int * rgb,
    133 							unsigned int * alpha)
    134 {
    135 	int j;
    136 	for(j = 0; j < 4; ++j) {
    137 		unsigned int swz = GET_SWZ(src.Swizzle, j);
    138 		if (swz < 3)
    139 			*rgb = 1;
    140 		else if (swz < 4)
    141 			*alpha = 1;
    142 	}
    143 }
    144 
    145 /**
    146  * Fill the given ALU instruction's opcodes and source operands into the given pair,
    147  * if possible.
    148  */
    149 static void set_pair_instruction(struct r300_fragment_program_compiler *c,
    150 	struct rc_pair_instruction * pair,
    151 	struct rc_sub_instruction * inst)
    152 {
    153 	int needrgb, needalpha, istranscendent;
    154 	const struct rc_opcode_info * opcode;
    155 	int i;
    156 
    157 	memset(pair, 0, sizeof(struct rc_pair_instruction));
    158 
    159 	classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
    160 
    161 	if (needrgb) {
    162 		if (istranscendent)
    163 			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
    164 		else
    165 			pair->RGB.Opcode = inst->Opcode;
    166 		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
    167 			pair->RGB.Saturate = 1;
    168 	}
    169 	if (needalpha) {
    170 		pair->Alpha.Opcode = inst->Opcode;
    171 		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
    172 			pair->Alpha.Saturate = 1;
    173 	}
    174 
    175 	opcode = rc_get_opcode_info(inst->Opcode);
    176 
    177 	/* Presubtract handling:
    178 	 * We need to make sure that the values used by the presubtract
    179 	 * operation end up in src0 or src1. */
    180 	if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
    181 		/* rc_pair_alloc_source() will fill in data for
    182 		 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
    183 		int j;
    184 		for(j = 0; j < 3; j++) {
    185 			int src_regs;
    186 			if(inst->SrcReg[j].File != RC_FILE_PRESUB)
    187 				continue;
    188 
    189 			src_regs = rc_presubtract_src_reg_count(
    190 							inst->PreSub.Opcode);
    191 			for(i = 0; i < src_regs; i++) {
    192 				unsigned int rgb = 0;
    193 				unsigned int alpha = 0;
    194 				src_uses(inst->SrcReg[j], &rgb, &alpha);
    195 				if(rgb) {
    196 					pair->RGB.Src[i].File =
    197 						inst->PreSub.SrcReg[i].File;
    198 					pair->RGB.Src[i].Index =
    199 						inst->PreSub.SrcReg[i].Index;
    200 					pair->RGB.Src[i].Used = 1;
    201 				}
    202 				if(alpha) {
    203 					pair->Alpha.Src[i].File =
    204 						inst->PreSub.SrcReg[i].File;
    205 					pair->Alpha.Src[i].Index =
    206 						inst->PreSub.SrcReg[i].Index;
    207 					pair->Alpha.Src[i].Used = 1;
    208 				}
    209 			}
    210 		}
    211 	}
    212 
    213 	for(i = 0; i < opcode->NumSrcRegs; ++i) {
    214 		int source;
    215 		if (needrgb && !istranscendent) {
    216 			unsigned int srcrgb = 0;
    217 			unsigned int srcalpha = 0;
    218 			unsigned int srcmask = 0;
    219 			int j;
    220 			/* We don't care about the alpha channel here.  We only
    221 			 * want the part of the swizzle that writes to rgb,
    222 			 * since we are creating an rgb instruction. */
    223 			for(j = 0; j < 3; ++j) {
    224 				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
    225 
    226 				if (swz < RC_SWIZZLE_W)
    227 					srcrgb = 1;
    228 				else if (swz == RC_SWIZZLE_W)
    229 					srcalpha = 1;
    230 
    231 				if (swz < RC_SWIZZLE_UNUSED)
    232 					srcmask |= 1 << j;
    233 			}
    234 			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
    235 							inst->SrcReg[i].File, inst->SrcReg[i].Index);
    236 			if (source < 0) {
    237 				rc_error(&c->Base, "Failed to translate "
    238 							"rgb instruction.\n");
    239 				return;
    240 			}
    241 			pair->RGB.Arg[i].Source = source;
    242 			pair->RGB.Arg[i].Swizzle =
    243 				rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
    244 			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
    245 			pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
    246 		}
    247 		if (needalpha) {
    248 			unsigned int srcrgb = 0;
    249 			unsigned int srcalpha = 0;
    250 			unsigned int swz;
    251 			if (istranscendent) {
    252 				swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
    253 			} else {
    254 				swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
    255 			}
    256 
    257 			if (swz < 3)
    258 				srcrgb = 1;
    259 			else if (swz < 4)
    260 				srcalpha = 1;
    261 			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
    262 							inst->SrcReg[i].File, inst->SrcReg[i].Index);
    263 			if (source < 0) {
    264 				rc_error(&c->Base, "Failed to translate "
    265 							"alpha instruction.\n");
    266 				return;
    267 			}
    268 			pair->Alpha.Arg[i].Source = source;
    269 			pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
    270 			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
    271 
    272 			if (istranscendent) {
    273 				pair->Alpha.Arg[i].Negate =
    274 					!!(inst->SrcReg[i].Negate &
    275 							inst->DstReg.WriteMask);
    276 			} else {
    277 				pair->Alpha.Arg[i].Negate =
    278 					!!(inst->SrcReg[i].Negate & RC_MASK_W);
    279 			}
    280 		}
    281 	}
    282 
    283 	/* Destination handling */
    284 	if (inst->DstReg.File == RC_FILE_OUTPUT) {
    285         if (inst->DstReg.Index == c->OutputDepth) {
    286             pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
    287         } else {
    288             for (i = 0; i < 4; i++) {
    289                 if (inst->DstReg.Index == c->OutputColor[i]) {
    290                     pair->RGB.Target = i;
    291                     pair->Alpha.Target = i;
    292                     pair->RGB.OutputWriteMask |=
    293                         inst->DstReg.WriteMask & RC_MASK_XYZ;
    294                     pair->Alpha.OutputWriteMask |=
    295                         GET_BIT(inst->DstReg.WriteMask, 3);
    296                     break;
    297                 }
    298             }
    299         }
    300 	} else {
    301 		if (needrgb) {
    302 			pair->RGB.DestIndex = inst->DstReg.Index;
    303 			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
    304 		}
    305 
    306 		if (needalpha) {
    307 			pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
    308 			if (pair->Alpha.WriteMask) {
    309 				pair->Alpha.DestIndex = inst->DstReg.Index;
    310 			}
    311 		}
    312 	}
    313 
    314 	if (needrgb) {
    315 		pair->RGB.Omod = inst->Omod;
    316 	}
    317 	if (needalpha) {
    318 		pair->Alpha.Omod = inst->Omod;
    319 	}
    320 
    321 	if (inst->WriteALUResult) {
    322 		pair->WriteALUResult = inst->WriteALUResult;
    323 		pair->ALUResultCompare = inst->ALUResultCompare;
    324 	}
    325 }
    326 
    327 
    328 static void check_opcode_support(struct r300_fragment_program_compiler *c,
    329 				 struct rc_sub_instruction *inst)
    330 {
    331 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
    332 
    333 	if (opcode->HasDstReg) {
    334 		if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
    335 			rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
    336 			return;
    337 		}
    338 	}
    339 
    340 	for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
    341 		if (inst->SrcReg[i].RelAddr) {
    342 			rc_error(&c->Base, "Fragment program does not support relative addressing "
    343 				 " of source operands.\n");
    344 			return;
    345 		}
    346 	}
    347 }
    348 
    349 
    350 /**
    351  * Translate all ALU instructions into corresponding pair instructions,
    352  * performing no other changes.
    353  */
    354 void rc_pair_translate(struct radeon_compiler *cc, void *user)
    355 {
    356 	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
    357 
    358 	for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
    359 	    inst != &c->Base.Program.Instructions;
    360 	    inst = inst->Next) {
    361 		const struct rc_opcode_info * opcode;
    362 		struct rc_sub_instruction copy;
    363 
    364 		if (inst->Type != RC_INSTRUCTION_NORMAL)
    365 			continue;
    366 
    367 		opcode = rc_get_opcode_info(inst->U.I.Opcode);
    368 
    369 		if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
    370 			continue;
    371 
    372 		copy = inst->U.I;
    373 
    374 		check_opcode_support(c, &copy);
    375 
    376 		final_rewrite(&copy);
    377 		inst->Type = RC_INSTRUCTION_PAIR;
    378 		set_pair_instruction(c, &inst->U.P, &copy);
    379 	}
    380 }
    381