Home | History | Annotate | Download | only in gd3d1x
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 Luca Barbieri
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining
      6  * a copy of this software and associated documentation files (the
      7  * "Software"), to deal in the Software without restriction, including
      8  * without limitation the rights to use, copy, modify, merge, publish,
      9  * distribute, sublicense, and/or sell copies of the Software, and to
     10  * permit persons to whom the Software is furnished to do so, subject to
     11  * the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the
     14  * next paragraph) shall be included in all copies or substantial
     15  * portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  *
     25  **************************************************************************/
     26 
     27 #include <d3d11shader.h>
     28 #include "d3d1xstutil.h"
     29 #include "sm4.h"
     30 #include "tgsi/tgsi_ureg.h"
     31 #include <vector>
     32 
     33 #if 1
     34 #define check(x) assert(x)
     35 #define fail(x) assert(0 && (x))
     36 #else
     37 #define check(x) do {if(!(x)) throw(#x);} while(0)
     38 #define fail(x) throw(x)
     39 #endif
     40 
     41 struct tgsi_interpolation
     42 {
     43 	unsigned interpolation;
     44 	bool centroid;
     45 };
     46 
     47 static tgsi_interpolation sm4_to_pipe_interpolation[] =
     48 {
     49 	{TGSI_INTERPOLATE_PERSPECTIVE, false}, /* UNDEFINED */
     50 	{TGSI_INTERPOLATE_CONSTANT, false},
     51 	{TGSI_INTERPOLATE_PERSPECTIVE, false}, /* LINEAR */
     52 	{TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_CENTROID */
     53 	{TGSI_INTERPOLATE_LINEAR, false}, /* LINEAR_NOPERSPECTIVE */
     54 	{TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_CENTROID */
     55 
     56 	// Added in D3D10.1
     57 	{TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_SAMPLE */
     58 	{TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_SAMPLE */
     59 };
     60 
     61 static int sm4_to_pipe_sv[] =
     62 {
     63 	-1,
     64 	TGSI_SEMANTIC_POSITION,
     65 	-1, /*TGSI_SEMANTIC_CLIP_DISTANCE */
     66 	-1, /*TGSI_SEMANTIC_CULL_DISTANCE */
     67 	-1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */
     68 	-1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */
     69 	-1, /*TGSI_SEMANTIC_VERTEXID,*/
     70 	TGSI_SEMANTIC_PRIMID,
     71 	TGSI_SEMANTIC_INSTANCEID,
     72 	TGSI_SEMANTIC_FACE,
     73 	-1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/
     74 };
     75 
     76 struct sm4_to_tgsi_converter
     77 {
     78 	struct ureg_program* ureg;
     79 	std::vector<struct ureg_dst> temps;
     80 	std::vector<struct ureg_dst> outputs;
     81 	std::vector<struct ureg_src> inputs;
     82 	std::vector<struct ureg_src> resources;
     83 	std::vector<struct ureg_src> samplers;
     84 	std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison
     85 	std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison
     86 	std::vector<std::pair<unsigned, unsigned> > loops;
     87 	sm4_insn* insn;
     88 	struct sm4_program& program;
     89 	std::vector<unsigned> sm4_to_tgsi_insn_num;
     90 	std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num;
     91 	bool in_sub;
     92 	bool avoid_txf;
     93 	bool avoid_int;
     94 
     95 	sm4_to_tgsi_converter(struct sm4_program& program)
     96 	: program(program)
     97 	{
     98 		avoid_txf = true;
     99 		avoid_int = false;
    100 	}
    101 
    102 	struct ureg_dst _reg(sm4_op& op)
    103 	{
    104 		switch(op.file)
    105 		{
    106 		case SM4_FILE_NULL:
    107 		{
    108 			struct ureg_dst d;
    109 			memset(&d, 0, sizeof(d));
    110 			d.File = TGSI_FILE_NULL;
    111 			return d;
    112 		}
    113 		case SM4_FILE_TEMP:
    114 			check(op.has_simple_index());
    115 			check(op.indices[0].disp < temps.size());
    116 			return temps[op.indices[0].disp];
    117 		case SM4_FILE_OUTPUT:
    118 			check(op.has_simple_index());
    119 			check(op.indices[0].disp < outputs.size());
    120 			return outputs[op.indices[0].disp];
    121 		default:
    122 			check(0);
    123 			return ureg_dst_undef();
    124 		}
    125 	}
    126 
    127 	struct ureg_dst _dst(unsigned i = 0)
    128 	{
    129 		check(i < insn->num_ops);
    130 		sm4_op& op = *insn->ops[i];
    131 		check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR);
    132 		struct ureg_dst d = ureg_writemask(_reg(op), op.mask);
    133 		if(insn->insn.sat)
    134 			d = ureg_saturate(d);
    135 		return d;
    136 	}
    137 
    138 	struct ureg_src _src(unsigned i)
    139 	{
    140 		check(i < insn->num_ops);
    141 		sm4_op& op = *insn->ops[i];
    142 		struct ureg_src s;
    143 		switch(op.file)
    144 		{
    145 		case SM4_FILE_IMMEDIATE32:
    146 			s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32);
    147 			break;
    148 		case SM4_FILE_INPUT:
    149 			check(op.is_index_simple(0));
    150 			check(op.num_indices == 1 || op.num_indices == 2);
    151 			// TODO: is this correct, or are incorrectly swapping the two indices in the GS case?
    152 			check(op.indices[op.num_indices - 1].disp < inputs.size());
    153 			s = inputs[op.indices[op.num_indices - 1].disp];
    154 			if(op.num_indices == 2)
    155 			{
    156 				s.Dimension = 1;
    157 				s.DimensionIndex = op.indices[0].disp;
    158 			}
    159 			break;
    160 		case SM4_FILE_CONSTANT_BUFFER:
    161 			// TODO: indirect addressing
    162 			check(op.num_indices == 2);
    163 			check(op.is_index_simple(0));
    164 			check(op.is_index_simple(1));
    165 			s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp);
    166 			s.Dimension = 1;
    167 			s.DimensionIndex = op.indices[0].disp;
    168 			break;
    169 		default:
    170 			s = ureg_src(_reg(op));
    171 			break;
    172 		}
    173 		if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR)
    174 			s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]);
    175 		else
    176 		{
    177 			/* immediates are masked to show needed values */
    178 			check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64);
    179 		}
    180 		if(op.abs)
    181 			s = ureg_abs(s);
    182 		if(op.neg)
    183 			s = ureg_negate(s);
    184 		return s;
    185 	};
    186 
    187 	int _idx(sm4_file file, unsigned i = 0)
    188 	{
    189 		check(i < insn->num_ops);
    190 		sm4_op& op = *insn->ops[i];
    191 		check(op.file == file);
    192 		check(op.has_simple_index());
    193 		return (int)op.indices[0].disp;
    194 	}
    195 
    196 	unsigned tex_target(unsigned resource, unsigned sampler)
    197 	{
    198 		unsigned shadow = sampler_modes[sampler];
    199 		unsigned target = shadow ? targets[resource].second : targets[resource].first;
    200 		check(target);
    201 		return target;
    202 	}
    203 
    204 	enum pipe_type res_return_type(unsigned type)
    205 	{
    206 		switch(type)
    207 		{
    208 		case D3D_RETURN_TYPE_UNORM: return PIPE_TYPE_UNORM;
    209 		case D3D_RETURN_TYPE_SNORM: return PIPE_TYPE_SNORM;
    210 		case D3D_RETURN_TYPE_SINT:  return PIPE_TYPE_SINT;
    211 		case D3D_RETURN_TYPE_UINT:  return PIPE_TYPE_UINT;
    212 		case D3D_RETURN_TYPE_FLOAT: return PIPE_TYPE_FLOAT;
    213 		default:
    214 			fail("invalid resource return type");
    215 			return PIPE_TYPE_FLOAT;
    216 		}
    217 	}
    218 
    219 	std::vector<struct ureg_dst> insn_tmps;
    220 
    221 	struct ureg_dst _tmp()
    222 	{
    223 		struct ureg_dst t = ureg_DECL_temporary(ureg);
    224 		insn_tmps.push_back(t);
    225 		return t;
    226 	}
    227 
    228 	struct ureg_dst _tmp(struct ureg_dst d)
    229 	{
    230 		if(d.File == TGSI_FILE_TEMPORARY)
    231 			return d;
    232 		else
    233 			return ureg_writemask(_tmp(), d.WriteMask);
    234 	}
    235 
    236 #define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break
    237 #define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break
    238 #define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break
    239 #define OP1(n) OP1_(n, n)
    240 #define OP2(n) OP2_(n, n)
    241 #define OP3(n) OP3_(n, n)
    242 #define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break;
    243 
    244 	void translate_insns(unsigned begin, unsigned end)
    245 	{
    246 		for(unsigned insn_num = begin; insn_num < end; ++insn_num)
    247 		{
    248 			sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg);
    249 			unsigned label;
    250 			insn = program.insns[insn_num];
    251 			bool ok;
    252 			ok = true;
    253 			switch(insn->opcode)
    254 			{
    255 			// trivial instructions
    256 			case SM4_OPCODE_NOP:
    257 				break;
    258 			OP1(MOV);
    259 
    260 			// float
    261 			OP2(ADD);
    262 			OP2(MUL);
    263 			OP3(MAD);
    264 			OP2(DIV);
    265 			OP1(FRC);
    266 			OP1(RCP);
    267 			OP2(MIN);
    268 			OP2(MAX);
    269 			OP2_(LT, SLT);
    270 			OP2_(GE, SGE);
    271 			OP2_(EQ, SEQ);
    272 			OP2_(NE, SNE);
    273 
    274 			// bitwise
    275 			OP1(NOT);
    276 			OP2(AND);
    277 			OP2(OR);
    278 			OP2(XOR);
    279 
    280 			// special mathematical
    281 			OP2(DP2);
    282 			OP2(DP3);
    283 			OP2(DP4);
    284 			OP1(RSQ);
    285 			OP1_(LOG, LG2);
    286 			OP1_(EXP, EX2);
    287 
    288 			// rounding
    289 			OP1_(ROUND_NE, ROUND);
    290 			OP1_(ROUND_Z, TRUNC);
    291 			OP1_(ROUND_PI, CEIL);
    292 			OP1_(ROUND_NI, FLR);
    293 
    294 			// cross-thread
    295 			OP1_(DERIV_RTX, DDX);
    296 			OP1_(DERIV_RTX_COARSE, DDX);
    297 			OP1_(DERIV_RTX_FINE, DDX);
    298 			OP1_(DERIV_RTY, DDY);
    299 			OP1_(DERIV_RTY_COARSE, DDY);
    300 			OP1_(DERIV_RTY_FINE, DDY);
    301 			case SM4_OPCODE_EMIT:
    302 				ureg_EMIT(ureg);
    303 				break;
    304 			case SM4_OPCODE_CUT:
    305 				ureg_ENDPRIM(ureg);
    306 				break;
    307 			case SM4_OPCODE_EMITTHENCUT:
    308 				ureg_EMIT(ureg);
    309 				ureg_ENDPRIM(ureg);
    310 				break;
    311 
    312 			// non-trivial instructions
    313 			case SM4_OPCODE_MOVC:
    314 				/* CMP checks for < 0, but MOVC checks for != 0
    315 				 * but fortunately, x != 0 is equivalent to -abs(x) < 0
    316 				 * XXX: can test_nz apply to this?!
    317 				 */
    318 				ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3));
    319 				break;
    320 			case SM4_OPCODE_SQRT:
    321 			{
    322 				struct ureg_dst d = _dst();
    323 				struct ureg_dst t = _tmp(d);
    324 				ureg_RSQ(ureg, t, _src(1));
    325 				ureg_RCP(ureg, d, ureg_src(t));
    326 				break;
    327 			}
    328 			case SM4_OPCODE_SINCOS:
    329 			{
    330 				struct ureg_dst s = _dst(0);
    331 				struct ureg_dst c = _dst(1);
    332 				struct ureg_src v = _src(2);
    333 				if(s.File != TGSI_FILE_NULL)
    334 					ureg_SIN(ureg, s, v);
    335 				if(c.File != TGSI_FILE_NULL)
    336 					ureg_COS(ureg, c, v);
    337 				break;
    338 			}
    339 
    340 			// control flow
    341 			case SM4_OPCODE_DISCARD:
    342 				ureg_KIL(ureg, _src(0));
    343 				break;
    344 			OP_CF(LOOP, BGNLOOP);
    345 			OP_CF(ENDLOOP, ENDLOOP);
    346 			case SM4_OPCODE_BREAK:
    347 				ureg_BRK(ureg);
    348 				break;
    349 			case SM4_OPCODE_BREAKC:
    350 				// XXX: can test_nz apply to this?!
    351 				ureg_BREAKC(ureg, _src(0));
    352 				break;
    353 			case SM4_OPCODE_CONTINUE:
    354 				ureg_CONT(ureg);
    355 				break;
    356 			case SM4_OPCODE_CONTINUEC:
    357 				// XXX: can test_nz apply to this?!
    358 				ureg_IF(ureg, _src(0), &label);
    359 				ureg_CONT(ureg);
    360 				ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
    361 				ureg_ENDIF(ureg);
    362 				break;
    363 			case SM4_OPCODE_SWITCH:
    364 				ureg_SWITCH(ureg, _src(0));
    365 				break;
    366 			case SM4_OPCODE_CASE:
    367 				ureg_CASE(ureg, _src(0));
    368 				break;
    369 			case SM4_OPCODE_DEFAULT:
    370 				ureg_DEFAULT(ureg);
    371 				break;
    372 			case SM4_OPCODE_ENDSWITCH:
    373 				ureg_ENDSWITCH(ureg);
    374 				break;
    375 			case SM4_OPCODE_CALL:
    376 				ureg_CAL(ureg, &label);
    377 				label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)]));
    378 				break;
    379 			case SM4_OPCODE_LABEL:
    380 				if(in_sub)
    381 					ureg_ENDSUB(ureg);
    382 				else
    383 					ureg_END(ureg);
    384 				ureg_BGNSUB(ureg);
    385 				in_sub = true;
    386 				break;
    387 			case SM4_OPCODE_RET:
    388 				if(in_sub || insn_num != (program.insns.size() - 1))
    389 					ureg_RET(ureg);
    390 				break;
    391 			case SM4_OPCODE_RETC:
    392 				ureg_IF(ureg, _src(0), &label);
    393 				if(insn->insn.test_nz)
    394 					ureg_RET(ureg);
    395 				ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
    396 				if(!insn->insn.test_nz)
    397 				{
    398 					ureg_ELSE(ureg, &label);
    399 					ureg_RET(ureg);
    400 					ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
    401 				}
    402 				ureg_ENDIF(ureg);
    403 				break;
    404 			OP_CF(ELSE, ELSE);
    405 			case SM4_OPCODE_ENDIF:
    406 				ureg_ENDIF(ureg);
    407 				break;
    408 			case SM4_OPCODE_IF:
    409 				if(insn->insn.test_nz)
    410 				{
    411 					ureg_IF(ureg, _src(0), &label);
    412 					label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num]));
    413 				}
    414 				else
    415 				{
    416 					unsigned linked = program.cf_insn_linked[insn_num];
    417 					if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF)
    418 					{
    419 						ureg_IF(ureg, _src(0), &label);
    420 						ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
    421 						ureg_ELSE(ureg, &label);
    422 						label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
    423 					}
    424 					else
    425 					{
    426 						/* we have to swap the branches in this case (fun!)
    427 						 * TODO: maybe just emit a SEQ 0?
    428 						 * */
    429 						unsigned endif = program.cf_insn_linked[linked];
    430 
    431 						ureg_IF(ureg, _src(0), &label);
    432 						label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
    433 
    434 						translate_insns(linked + 1, endif);
    435 
    436 						sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg);
    437 						ureg_ELSE(ureg, &label);
    438 						label_to_sm4_insn_num.push_back(std::make_pair(label, endif));
    439 
    440 						translate_insns(insn_num + 1, linked);
    441 
    442 						insn_num = endif - 1;
    443 						goto next;
    444 					}
    445 				}
    446 				break;
    447 			case SM4_OPCODE_RESINFO:
    448 				// TODO: return type
    449 				ureg_SVIEWINFO(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
    450 				break;
    451 			// TODO: sample index, texture offset
    452 			case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg
    453 				ureg_LOAD(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
    454 				break;
    455 			case SM4_OPCODE_LD_MS:
    456 				ureg_LOAD_MS(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
    457 				break;
    458 			case SM4_OPCODE_SAMPLE: // dst, coord, res, samp
    459 				ureg_SAMPLE(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]);
    460 				break;
    461 			case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x
    462 				ureg_SAMPLE_B(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
    463 				break;
    464 			case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x
    465 				ureg_SAMPLE_C(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
    466 				break;
    467 			case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x
    468 				ureg_SAMPLE_C_LZ(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
    469 				break;
    470 			case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy
    471 				ureg_SAMPLE_D(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4), _src(5));
    472 				break;
    473 			case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x
    474 			{
    475 				struct ureg_dst tmp = _tmp();
    476 				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1));
    477 				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0));
    478 				ureg_SAMPLE_L(ureg, _dst(), ureg_src(tmp), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]);
    479 				break;
    480 			}
    481 			default:
    482 				ok = false;
    483 				break;
    484 			}
    485 
    486 			if(!ok && !avoid_int)
    487 			{
    488 				ok = true;
    489 				switch(insn->opcode)
    490 				{
    491 				// integer
    492 				OP1_(ITOF, I2F);
    493 				OP1_(FTOI, F2I);
    494 				OP2_(IADD, UADD);
    495 				OP1(INEG);
    496 				OP2_(IMUL, UMUL);
    497 				OP3_(IMAD, UMAD);
    498 				OP2_(ISHL, SHL);
    499 				OP2_(ISHR, ISHR);
    500 				OP2(IMIN);
    501 				OP2(IMAX);
    502 				OP2_(ILT, ISLT);
    503 				OP2_(IGE, ISGE);
    504 				OP2_(IEQ, USEQ);
    505 				OP2_(INE, USNE);
    506 
    507 				// unsigned
    508 				OP1_(UTOF, U2F);
    509 				OP1_(FTOU, F2U);
    510 				OP2(UMUL);
    511 				OP3(UMAD);
    512 				OP2(UMIN);
    513 				OP2(UMAX);
    514 				OP2_(ULT, USLT);
    515 				OP2_(UGE, USGE);
    516 				OP2(USHR);
    517 
    518 				case SM4_OPCODE_UDIV:
    519 				{
    520 					struct ureg_dst q = _dst(0);
    521 					struct ureg_dst r = _dst(1);
    522 					struct ureg_src a = _src(2);
    523 					struct ureg_src b = _src(3);
    524 					if(q.File != TGSI_FILE_NULL)
    525 						ureg_UDIV(ureg, q, a, b);
    526 					if(r.File != TGSI_FILE_NULL)
    527 						ureg_UMOD(ureg, r, a, b);
    528 					break;
    529 				}
    530 				default:
    531 					ok = false;
    532 				}
    533 			}
    534 
    535 			if(!ok && avoid_int)
    536 			{
    537 				ok = true;
    538 				switch(insn->opcode)
    539 				{
    540 				case SM4_OPCODE_ITOF:
    541 				case SM4_OPCODE_UTOF:
    542 					break;
    543 				OP1_(FTOI, TRUNC);
    544 				OP1_(FTOU, FLR);
    545 				// integer
    546 				OP2_(IADD, ADD);
    547 				OP2_(IMUL, MUL);
    548 				OP3_(IMAD, MAD);
    549 				OP2_(MIN, MIN);
    550 				OP2_(MAX, MAX);
    551 				OP2_(ILT, SLT);
    552 				OP2_(IGE, SGE);
    553 				OP2_(IEQ, SEQ);
    554 				OP2_(INE, SNE);
    555 
    556 				// unsigned
    557 				OP2_(UMUL, MUL);
    558 				OP3_(UMAD, MAD);
    559 				OP2_(UMIN, MIN);
    560 				OP2_(UMAX, MAX);
    561 				OP2_(ULT, SLT);
    562 				OP2_(UGE, SGE);
    563 
    564 				case SM4_OPCODE_INEG:
    565 					ureg_MOV(ureg, _dst(), ureg_negate(_src(1)));
    566 					break;
    567 				case SM4_OPCODE_ISHL:
    568 				{
    569 					struct ureg_dst d = _dst();
    570 					struct ureg_dst t = _tmp(d);
    571 					ureg_EX2(ureg, t, _src(2));
    572 					ureg_MUL(ureg, d, ureg_src(t), _src(1));
    573 					break;
    574 				}
    575 				case SM4_OPCODE_ISHR:
    576 				case SM4_OPCODE_USHR:
    577 				{
    578 					struct ureg_dst d = _dst();
    579 					struct ureg_dst t = _tmp(d);
    580 					ureg_EX2(ureg, t, ureg_negate(_src(2)));
    581 					ureg_MUL(ureg, t, ureg_src(t), _src(1));
    582 					ureg_FLR(ureg, d, ureg_src(t));
    583 					break;
    584 				}
    585 				case SM4_OPCODE_UDIV:
    586 				{
    587 					struct ureg_dst q = _dst(0);
    588 					struct ureg_dst r = _dst(1);
    589 					struct ureg_src a = _src(2);
    590 					struct ureg_src b = _src(3);
    591 					struct ureg_dst f = _tmp();
    592 					ureg_DIV(ureg, f, a, b);
    593 					if(q.File != TGSI_FILE_NULL)
    594 						ureg_FLR(ureg, q, ureg_src(f));
    595 					if(r.File != TGSI_FILE_NULL)
    596 					{
    597 						ureg_FRC(ureg, f, ureg_src(f));
    598 						ureg_MUL(ureg, r, ureg_src(f), b);
    599 					}
    600 					break;
    601 				}
    602 				default:
    603 					ok = false;
    604 				}
    605 			}
    606 
    607 			check(ok);
    608 
    609 			if(!insn_tmps.empty())
    610 			{
    611 				for(unsigned i = 0; i < insn_tmps.size(); ++i)
    612 					ureg_release_temporary(ureg, insn_tmps[i]);
    613 				insn_tmps.clear();
    614 			}
    615 next:;
    616 		}
    617 	}
    618 
    619 	void* do_translate()
    620 	{
    621 		unsigned processor;
    622 		switch(program.version.type)
    623 		{
    624 		case 0:
    625 			processor = TGSI_PROCESSOR_FRAGMENT;
    626 			break;
    627 		case 1:
    628 			processor = TGSI_PROCESSOR_VERTEX;
    629 			break;
    630 		case 2:
    631 			processor = TGSI_PROCESSOR_GEOMETRY;
    632 			break;
    633 		default:
    634 			fail("Tessellation and compute shaders not yet supported");
    635 			return 0;
    636 		}
    637 
    638 		if(!sm4_link_cf_insns(program))
    639 			fail("Malformed control flow");
    640 		if(!sm4_find_labels(program))
    641 			fail("Failed to locate labels");
    642 
    643 		ureg = ureg_create(processor);
    644 
    645 		in_sub = false;
    646 
    647 		sm4_to_tgsi_insn_num.resize(program.insns.size());
    648 		for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num)
    649 		{
    650 			sm4_dcl& dcl = *program.dcls[insn_num];
    651 			int idx = -1;
    652 			if(dcl.op.get() && dcl.op->is_index_simple(0))
    653 				idx = dcl.op->indices[0].disp;
    654 			switch(dcl.opcode)
    655 			{
    656 			case SM4_OPCODE_DCL_GLOBAL_FLAGS:
    657 				break;
    658 			case SM4_OPCODE_DCL_TEMPS:
    659 				for(unsigned i = 0; i < dcl.num; ++i)
    660 					temps.push_back(ureg_DECL_temporary(ureg));
    661 				break;
    662 			case SM4_OPCODE_DCL_INPUT:
    663 				check(idx >= 0);
    664 				if(processor == TGSI_PROCESSOR_VERTEX)
    665 				{
    666 					if(inputs.size() <= (unsigned)idx)
    667 						inputs.resize(idx + 1);
    668 					inputs[idx] = ureg_DECL_vs_input(ureg, idx);
    669 				}
    670 				else if(processor == TGSI_PROCESSOR_GEOMETRY)
    671 				{
    672 					// TODO: is this correct?
    673 					unsigned gsidx = dcl.op->indices[1].disp;
    674 					if(inputs.size() <= (unsigned)gsidx)
    675 						inputs.resize(gsidx + 1);
    676 					inputs[gsidx] = ureg_DECL_gs_input(ureg, gsidx, TGSI_SEMANTIC_GENERIC, gsidx);
    677 				}
    678 				else
    679 					check(0);
    680 				break;
    681 			case SM4_OPCODE_DCL_INPUT_PS:
    682 				check(idx >= 0);
    683 				if(inputs.size() <= (unsigned)idx)
    684 					inputs.resize(idx + 1);
    685 				inputs[idx] = ureg_DECL_fs_input_cyl_centroid(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].interpolation, 0, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].centroid);
    686 				break;
    687 			case SM4_OPCODE_DCL_OUTPUT:
    688 				check(idx >= 0);
    689 				if(outputs.size() <= (unsigned)idx)
    690 					outputs.resize(idx + 1);
    691 				if(processor == TGSI_PROCESSOR_FRAGMENT)
    692 					outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx);
    693 				else
    694 					outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx);
    695 				break;
    696 			case SM4_OPCODE_DCL_INPUT_SIV:
    697 			case SM4_OPCODE_DCL_INPUT_SGV:
    698 			case SM4_OPCODE_DCL_INPUT_PS_SIV:
    699 			case SM4_OPCODE_DCL_INPUT_PS_SGV:
    700 				check(idx >= 0);
    701 				if(inputs.size() <= (unsigned)idx)
    702 					inputs.resize(idx + 1);
    703 				// TODO: is this correct?
    704 				inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0);
    705 				break;
    706 			case SM4_OPCODE_DCL_OUTPUT_SIV:
    707 			case SM4_OPCODE_DCL_OUTPUT_SGV:
    708 				check(idx >= 0);
    709 				if(outputs.size() <= (unsigned)idx)
    710 					outputs.resize(idx + 1);
    711 				check(sm4_to_pipe_sv[dcl.sv] >= 0);
    712 				outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0);
    713 				break;
    714 			case SM4_OPCODE_DCL_RESOURCE:
    715 				check(idx >= 0);
    716 				if(targets.size() <= (unsigned)idx)
    717 					targets.resize(idx + 1);
    718 				switch(dcl.dcl_resource.target)
    719 				{
    720 				case SM4_TARGET_TEXTURE1D:
    721 					targets[idx].first = TGSI_TEXTURE_1D;
    722 					targets[idx].second = TGSI_TEXTURE_SHADOW1D;
    723 					break;
    724 				case SM4_TARGET_TEXTURE1DARRAY:
    725 					targets[idx].first = TGSI_TEXTURE_1D_ARRAY;
    726 					targets[idx].second = TGSI_TEXTURE_SHADOW1D_ARRAY;
    727 					break;
    728 				case SM4_TARGET_TEXTURE2D:
    729 					targets[idx].first = TGSI_TEXTURE_2D;
    730 					targets[idx].second = TGSI_TEXTURE_SHADOW2D;
    731 					break;
    732 				case SM4_TARGET_TEXTURE2DARRAY:
    733 					targets[idx].first = TGSI_TEXTURE_2D_ARRAY;
    734 					targets[idx].second = TGSI_TEXTURE_SHADOW2D_ARRAY;
    735 					break;
    736 				case SM4_TARGET_TEXTURE3D:
    737 					targets[idx].first = TGSI_TEXTURE_3D;
    738 					targets[idx].second = 0;
    739 					break;
    740 				case SM4_TARGET_TEXTURECUBE:
    741 					targets[idx].first = TGSI_TEXTURE_CUBE;
    742 					targets[idx].second = 0;
    743 					break;
    744 				default:
    745 					// HACK to make SimpleSample10 work
    746 					//check(0);
    747 					targets[idx].first = TGSI_TEXTURE_2D;
    748 					targets[idx].second = TGSI_TEXTURE_SHADOW2D;
    749 					break;
    750 				}
    751 				if(resources.size() <= (unsigned)idx)
    752 					resources.resize(idx + 1);
    753 				resources[idx] = ureg_DECL_sampler_view(
    754                                    ureg, idx, targets[idx].first,
    755                                    res_return_type(dcl.rrt.x),
    756                                    res_return_type(dcl.rrt.y),
    757                                    res_return_type(dcl.rrt.z),
    758                                    res_return_type(dcl.rrt.w));
    759 				break;
    760 			case SM4_OPCODE_DCL_SAMPLER:
    761 				check(idx >= 0);
    762 				if(sampler_modes.size() <= (unsigned)idx)
    763 					sampler_modes.resize(idx + 1);
    764 				check(!dcl.dcl_sampler.mono);
    765 				sampler_modes[idx] = dcl.dcl_sampler.shadow;
    766 				if(samplers.size() <= (unsigned)idx)
    767 					samplers.resize(idx + 1);
    768 				samplers[idx] = ureg_DECL_sampler(ureg, idx);
    769 				break;
    770 			case SM4_OPCODE_DCL_CONSTANT_BUFFER:
    771 				check(dcl.op->num_indices == 2);
    772 				check(dcl.op->is_index_simple(0));
    773 				check(dcl.op->is_index_simple(1));
    774 				idx = dcl.op->indices[0].disp;
    775 				ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx);
    776 				break;
    777 			case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
    778 				ureg_property_gs_input_prim(ureg, d3d_to_pipe_prim_type[dcl.dcl_gs_input_primitive.primitive]);
    779 				break;
    780 			case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
    781 				ureg_property_gs_output_prim(ureg, d3d_to_pipe_prim[dcl.dcl_gs_output_primitive_topology.primitive_topology]);
    782 				break;
    783 			case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
    784 				ureg_property_gs_max_vertices(ureg, dcl.num);
    785 				break;
    786 			default:
    787 				check(0);
    788 			}
    789 		}
    790 
    791 		translate_insns(0, program.insns.size());
    792 		sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg));
    793 		if(in_sub)
    794 			ureg_ENDSUB(ureg);
    795 		else
    796 			ureg_END(ureg);
    797 
    798 		for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i)
    799 			ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]);
    800 
    801 		const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0);
    802 		ureg_destroy(ureg);
    803 		return (void*)tokens;
    804 	}
    805 
    806 	void* translate()
    807 	{
    808 		try
    809 		{
    810 			return do_translate();
    811 		}
    812 		catch(const char*)
    813 		{
    814 			return 0;
    815 		}
    816 	}
    817 };
    818 
    819 void* sm4_to_tgsi(struct sm4_program& program)
    820 {
    821 	sm4_to_tgsi_converter conv(program);
    822 	return conv.translate();
    823 }
    824 
    825 void* sm4_to_tgsi_linkage_only(struct sm4_program& prog)
    826 {
    827 	struct ureg_program* ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
    828 
    829 	uint64_t already = 0;
    830 	for(unsigned n = 0, i = 0; i < prog.num_params_out; ++i)
    831 	{
    832 		unsigned sn, si;
    833 
    834 		if(already & (1ULL << prog.params_out[i].Register))
    835 			continue;
    836 		already |= 1ULL << prog.params_out[i].Register;
    837 
    838 		switch(prog.params_out[i].SystemValueType)
    839 		{
    840 		case D3D_NAME_UNDEFINED:
    841 			sn = TGSI_SEMANTIC_GENERIC;
    842 			si = n++;
    843 			break;
    844 		case D3D_NAME_CULL_DISTANCE:
    845 		case D3D_NAME_CLIP_DISTANCE:
    846 			// FIXME
    847 			sn = 0;
    848 			si = prog.params_out[i].SemanticIndex;
    849 			assert(0);
    850 			break;
    851 		default:
    852 			continue;
    853 		}
    854 
    855 		ureg_DECL_output(ureg, sn, si);
    856 	}
    857 
    858 	const struct tgsi_token* tokens = ureg_get_tokens(ureg, 0);
    859 	ureg_destroy(ureg);
    860 	return (void*)tokens;
    861 }
    862