Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #include <cmath>
     28 
     29 #include "sb_shader.h"
     30 
     31 namespace r600_sb {
     32 
     33 value* get_select_value_for_em(shader& sh, value* em) {
     34 	if (!em->def)
     35 		return NULL;
     36 
     37 	node *predset = em->def;
     38 	if (!predset->is_pred_set())
     39 		return NULL;
     40 
     41 	alu_node *s = sh.clone(static_cast<alu_node*>(predset));
     42 	convert_predset_to_set(sh, s);
     43 
     44 	predset->insert_after(s);
     45 
     46 	value* &d0 = s->dst[0];
     47 	d0 = sh.create_temp_value();
     48 	d0->def = s;
     49 	return d0;
     50 }
     51 
     52 void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
     53 	n.src.resize(1);
     54 	n.src[0] = src;
     55 	n.bc.src[0].abs = abs;
     56 	n.bc.src[0].neg = neg;
     57 	n.bc.set_op(ALU_OP1_MOV);
     58 }
     59 
     60 expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
     61 
     62 value * expr_handler::get_const(const literal &l) {
     63 	value *v = sh.get_const_value(l);
     64 	if (!v->gvn_source)
     65 		vt.add_value(v);
     66 	return v;
     67 }
     68 
     69 void expr_handler::assign_source(value *dst, value *src) {
     70 	dst->gvn_source = src->gvn_source;
     71 }
     72 
     73 bool expr_handler::equal(value *l, value *r) {
     74 
     75 	assert(l != r);
     76 
     77 	if (l->is_lds_access() || r->is_lds_access())
     78 		return false;
     79 	if (l->gvalue() == r->gvalue())
     80 		return true;
     81 
     82 	if (l->def && r->def)
     83 		return defs_equal(l, r);
     84 
     85 	if (l->is_rel() && r->is_rel())
     86 		return ivars_equal(l, r);
     87 
     88 	return false;
     89 }
     90 
     91 bool expr_handler::ivars_equal(value* l, value* r) {
     92 	if (l->rel->gvalue() == r->rel->gvalue()
     93 			&& l->select == r->select) {
     94 
     95 		vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
     96 		vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
     97 
     98 		// FIXME: replace this with more precise aliasing test
     99 		return lv == rv;
    100 	}
    101 	return false;
    102 }
    103 
    104 bool expr_handler::defs_equal(value* l, value* r) {
    105 
    106 	node *d1 = l->def;
    107 	node *d2 = r->def;
    108 
    109 	if (d1->type != d2->type || d1->subtype != d2->subtype)
    110 		return false;
    111 
    112 	if (d1->is_pred_set() || d2->is_pred_set())
    113 		return false;
    114 
    115 	if (d1->type == NT_OP) {
    116 		switch (d1->subtype) {
    117 		case NST_ALU_INST:
    118 			return ops_equal(
    119 					static_cast<alu_node*>(d1),
    120 					static_cast<alu_node*>(d2));
    121 //		case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
    122 //			static_cast<fetch_node*>(d2);
    123 //		case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
    124 //			static_cast<cf_node*>(d2);
    125 		default:
    126 			break;
    127 		}
    128 	}
    129 	return false;
    130 }
    131 
    132 bool expr_handler::try_fold(value* v) {
    133 	assert(!v->gvn_source);
    134 
    135 	if (v->def)
    136 		try_fold(v->def);
    137 
    138 	if (v->gvn_source)
    139 		return true;
    140 
    141 	return false;
    142 }
    143 
    144 bool expr_handler::try_fold(node* n) {
    145 	return n->fold_dispatch(this);
    146 }
    147 
    148 bool expr_handler::fold(node& n) {
    149 	if (n.subtype == NST_PHI) {
    150 
    151 		value *s = n.src[0];
    152 
    153 		// FIXME disabling phi folding for registers for now, otherwise we lose
    154 		// control flow information in some cases
    155 		// (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
    156 		// probably control flow transformation is required to enable it
    157 		if (s->is_sgpr())
    158 			return false;
    159 
    160 		for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
    161 			value *v = *I;
    162 			if (!s->v_equal(v))
    163 				return false;
    164 		}
    165 
    166 		assign_source(n.dst[0], s);
    167 	} else {
    168 		assert(n.subtype == NST_PSI);
    169 		assert(n.src.size() >= 6);
    170 
    171 		value *s = n.src[2];
    172 		assert(s->gvn_source);
    173 
    174 		for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
    175 			value *v = *(I+2);
    176 			if (!s->v_equal(v))
    177 				return false;
    178 		}
    179 		assign_source(n.dst[0], s);
    180 	}
    181 	return true;
    182 }
    183 
    184 bool expr_handler::fold(container_node& n) {
    185 	return false;
    186 }
    187 
    188 bool expr_handler::fold_setcc(alu_node &n) {
    189 
    190 	value* v0 = n.src[0]->gvalue();
    191 	value* v1 = n.src[1]->gvalue();
    192 
    193 	assert(v0 && v1 && n.dst[0]);
    194 
    195 	unsigned flags = n.bc.op_ptr->flags;
    196 	unsigned cc = flags & AF_CC_MASK;
    197 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
    198 	unsigned dst_type = flags & AF_DST_TYPE_MASK;
    199 
    200 	bool cond_result;
    201 	bool have_result = false;
    202 
    203 	bool isc0 = v0->is_const();
    204 	bool isc1 = v1->is_const();
    205 
    206 	literal dv, cv0, cv1;
    207 
    208 	if (isc0) {
    209 		cv0 = v0->get_const_value();
    210 		apply_alu_src_mod(n.bc, 0, cv0);
    211 	}
    212 
    213 	if (isc1) {
    214 		cv1 = v1->get_const_value();
    215 		apply_alu_src_mod(n.bc, 1, cv1);
    216 	}
    217 
    218 	if (isc0 && isc1) {
    219 		cond_result = evaluate_condition(flags, cv0, cv1);
    220 		have_result = true;
    221 	} else if (isc1) {
    222 		if (cmp_type == AF_FLOAT_CMP) {
    223 			if (n.bc.src[0].abs && !n.bc.src[0].neg) {
    224 				if (cv1.f < 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
    225 					cond_result = true;
    226 					have_result = true;
    227 				} else if (cv1.f <= 0.0f && cc == AF_CC_GE) {
    228 					cond_result = true;
    229 					have_result = true;
    230 				}
    231 			} else if (n.bc.src[0].abs && n.bc.src[0].neg) {
    232 				if (cv1.f > 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
    233 					cond_result = false;
    234 					have_result = true;
    235 				} else if (cv1.f >= 0.0f && cc == AF_CC_GT) {
    236 					cond_result = false;
    237 					have_result = true;
    238 				}
    239 			}
    240 		} else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
    241 			cond_result = true;
    242 			have_result = true;
    243 		}
    244 	} else if (isc0) {
    245 		if (cmp_type == AF_FLOAT_CMP) {
    246 			if (n.bc.src[1].abs && !n.bc.src[1].neg) {
    247 				if (cv0.f <= 0.0f && cc == AF_CC_GT) {
    248 					cond_result = false;
    249 					have_result = true;
    250 				} else if (cv0.f < 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
    251 					cond_result = false;
    252 					have_result = true;
    253 				}
    254 			} else if (n.bc.src[1].abs && n.bc.src[1].neg) {
    255 				if (cv0.f >= 0.0f && cc == AF_CC_GE) {
    256 					cond_result = true;
    257 					have_result = true;
    258 				} else if (cv0.f > 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
    259 					cond_result = true;
    260 					have_result = true;
    261 				}
    262 			}
    263 		} else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
    264 			cond_result = false;
    265 			have_result = true;
    266 		}
    267 	} else if (v0 == v1) {
    268 		bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
    269 		if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
    270 			// NOTE can't handle float comparisons here because of NaNs
    271 			cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
    272 			have_result = true;
    273 		}
    274 	}
    275 
    276 	if (have_result) {
    277 		literal result;
    278 
    279 		if (cond_result)
    280 			result = dst_type != AF_FLOAT_DST ?
    281 					literal(0xFFFFFFFFu) : literal(1.0f);
    282 		else
    283 			result = literal(0);
    284 
    285 		convert_to_mov(n, sh.get_const_value(result));
    286 		return fold_alu_op1(n);
    287 	}
    288 
    289 	return false;
    290 }
    291 
    292 bool expr_handler::fold(alu_node& n) {
    293 
    294 	switch (n.bc.op_ptr->src_count) {
    295 	case 1: return fold_alu_op1(n);
    296 	case 2: return fold_alu_op2(n);
    297 	case 3: return fold_alu_op3(n);
    298 	default:
    299 		assert(0);
    300 	}
    301 	return false;
    302 }
    303 
    304 bool expr_handler::fold(fetch_node& n) {
    305 
    306 	unsigned chan = 0;
    307 	for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
    308 		value* &v = *I;
    309 		if (v) {
    310 			if (n.bc.dst_sel[chan] == SEL_0)
    311 				assign_source(*I, get_const(0.0f));
    312 			else if (n.bc.dst_sel[chan] == SEL_1)
    313 				assign_source(*I, get_const(1.0f));
    314 		}
    315 		++chan;
    316 	}
    317 	return false;
    318 }
    319 
    320 bool expr_handler::fold(cf_node& n) {
    321 	return false;
    322 }
    323 
    324 void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
    325                                      literal &v) {
    326 	const bc_alu_src &s = bc.src[src];
    327 
    328 	if (s.abs)
    329 		v = fabs(v.f);
    330 	if (s.neg)
    331 		v = -v.f;
    332 }
    333 
    334 void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
    335 	float omod_coeff[] = {2.0f, 4.0, 0.5f};
    336 
    337 	if (bc.omod)
    338 		v = v.f * omod_coeff[bc.omod - 1];
    339 	if (bc.clamp)
    340 		v = float_clamp(v.f);
    341 }
    342 
    343 bool expr_handler::args_equal(const vvec &l, const vvec &r) {
    344 
    345 	assert(l.size() == r.size());
    346 
    347 	int s = l.size();
    348 
    349 	for (int k = 0; k < s; ++k) {
    350 		if (!l[k]->v_equal(r[k]))
    351 			return false;
    352 	}
    353 
    354 	return true;
    355 }
    356 
    357 bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
    358 	const bc_alu &b0 = l->bc;
    359 	const bc_alu &b1 = r->bc;
    360 
    361 	if (b0.op != b1.op)
    362 		return false;
    363 
    364 	unsigned src_count = b0.op_ptr->src_count;
    365 
    366 	if (b0.index_mode != b1.index_mode)
    367 		return false;
    368 
    369 	if (b0.clamp != b1.clamp || b0.omod != b1.omod)
    370 			return false;
    371 
    372 	for (unsigned s = 0; s < src_count; ++s) {
    373 		const bc_alu_src &s0 = b0.src[s];
    374 		const bc_alu_src &s1 = b1.src[s];
    375 
    376 		if (s0.abs != s1.abs || s0.neg != s1.neg)
    377 			return false;
    378 	}
    379 	return args_equal(l->src, r->src);
    380 }
    381 
    382 bool expr_handler::fold_alu_op1(alu_node& n) {
    383 
    384 	assert(!n.src.empty());
    385 	if (n.src.empty())
    386 		return false;
    387 
    388 	/* don't fold LDS instructions */
    389 	if (n.bc.op_ptr->flags & AF_LDS)
    390 		return false;
    391 
    392 	value* v0 = n.src[0]->gvalue();
    393 
    394 	if (v0->is_lds_oq() || v0->is_lds_access())
    395 		return false;
    396 	assert(v0 && n.dst[0]);
    397 
    398 	if (!v0->is_const()) {
    399 		// handle (MOV -(MOV -x)) => (MOV x)
    400 		if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs
    401 				&& v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) {
    402 			alu_node *sd = static_cast<alu_node*>(v0->def);
    403 			if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs &&
    404 					sd->bc.src[0].neg) {
    405 				n.src[0] = sd->src[0];
    406 				n.bc.src[0].neg = 0;
    407 				v0 = n.src[0]->gvalue();
    408 			}
    409 		}
    410 
    411 		if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
    412 				n.bc.op == ALU_OP1_MOVA_GPR_INT)
    413 				&& n.bc.clamp == 0 && n.bc.omod == 0
    414 				&& n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 &&
    415 				n.src.size() == 1 /* RIM/SIM can be appended as additional values */) {
    416 			assign_source(n.dst[0], v0);
    417 			return true;
    418 		}
    419 		return false;
    420 	}
    421 
    422 	literal dv, cv = v0->get_const_value();
    423 	apply_alu_src_mod(n.bc, 0, cv);
    424 
    425 	switch (n.bc.op) {
    426 	case ALU_OP1_CEIL: dv = ceil(cv.f); break;
    427 	case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
    428 	case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break;
    429 	case ALU_OP1_FLOOR: dv = floor(cv.f); break;
    430 	case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
    431 	case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break;
    432 	case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break;
    433 	case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break;
    434 	case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
    435 	case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break;
    436 	case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
    437 	case ALU_OP1_LOG_CLAMPED:
    438 	case ALU_OP1_LOG_IEEE:
    439 		if (cv.f != 0.0f)
    440 			dv = log2(cv.f);
    441 		else
    442 			// don't fold to NAN, let the GPU handle it for now
    443 			// (prevents degenerate LIT tests from failing)
    444 			return false;
    445 		break;
    446 	case ALU_OP1_MOV: dv = cv; break;
    447 	case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
    448 //	case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
    449 //	case ALU_OP1_MOVA_GPR_INT:
    450 	case ALU_OP1_NOT_INT: dv = ~cv.i; break;
    451 	case ALU_OP1_PRED_SET_INV:
    452 		dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
    453 	case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
    454 	case ALU_OP1_RECIPSQRT_CLAMPED:
    455 	case ALU_OP1_RECIPSQRT_FF:
    456 	case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break;
    457 	case ALU_OP1_RECIP_CLAMPED:
    458 	case ALU_OP1_RECIP_FF:
    459 	case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
    460 //	case ALU_OP1_RECIP_INT:
    461 	case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
    462 //	case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
    463 	case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
    464 	case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break;
    465 	case ALU_OP1_TRUNC: dv = trunc(cv.f); break;
    466 
    467 	default:
    468 		return false;
    469 	}
    470 
    471 	apply_alu_dst_mod(n.bc, dv);
    472 	assign_source(n.dst[0], get_const(dv));
    473 	return true;
    474 }
    475 
    476 bool expr_handler::fold_mul_add(alu_node *n) {
    477 
    478 	bool ieee;
    479 	value* v0 = n->src[0]->gvalue();
    480 
    481 	alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ?
    482 			static_cast<alu_node*>(v0->def) : NULL;
    483 
    484 	if (d0) {
    485 		if (d0->is_alu_op(ALU_OP2_MUL_IEEE))
    486 			ieee = true;
    487 		else if (d0->is_alu_op(ALU_OP2_MUL))
    488 			ieee = false;
    489 		else
    490 			return false;
    491 
    492 		if (!d0->bc.src[0].abs && !d0->bc.src[1].abs &&
    493 				!n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod &&
    494 				!d0->bc.clamp && !n->bc.omod &&
    495 				(!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() ||
    496 						!n->src[1]->is_kcache())) {
    497 
    498 			bool mul_neg = n->bc.src[0].neg;
    499 
    500 			n->src.resize(3);
    501 			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
    502 			n->src[2] = n->src[1];
    503 			n->bc.src[2] = n->bc.src[1];
    504 			n->src[0] = d0->src[0];
    505 			n->bc.src[0] = d0->bc.src[0];
    506 			n->src[1] = d0->src[1];
    507 			n->bc.src[1] = d0->bc.src[1];
    508 
    509 			n->bc.src[0].neg ^= mul_neg;
    510 
    511 			fold_alu_op3(*n);
    512 			return true;
    513 		}
    514 	}
    515 
    516 	value* v1 = n->src[1]->gvalue();
    517 
    518 	alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ?
    519 			static_cast<alu_node*>(v1->def) : NULL;
    520 
    521 	if (d1) {
    522 		if (d1->is_alu_op(ALU_OP2_MUL_IEEE))
    523 			ieee = true;
    524 		else if (d1->is_alu_op(ALU_OP2_MUL))
    525 			ieee = false;
    526 		else
    527 			return false;
    528 
    529 		if (!d1->bc.src[1].abs && !d1->bc.src[0].abs &&
    530 				!n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod &&
    531 				!d1->bc.clamp && !n->bc.omod &&
    532 				(!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() ||
    533 						!n->src[0]->is_kcache())) {
    534 
    535 			bool mul_neg = n->bc.src[1].neg;
    536 
    537 			n->src.resize(3);
    538 			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
    539 			n->src[2] = n->src[0];
    540 			n->bc.src[2] = n->bc.src[0];
    541 			n->src[1] = d1->src[1];
    542 			n->bc.src[1] = d1->bc.src[1];
    543 			n->src[0] = d1->src[0];
    544 			n->bc.src[0] = d1->bc.src[0];
    545 
    546 			n->bc.src[1].neg ^= mul_neg;
    547 
    548 			fold_alu_op3(*n);
    549 			return true;
    550 		}
    551 	}
    552 
    553 	return false;
    554 }
    555 
    556 bool expr_handler::eval_const_op(unsigned op, literal &r,
    557                                  literal cv0, literal cv1) {
    558 
    559 	switch (op) {
    560 	case ALU_OP2_ADD: r = cv0.f + cv1.f; break;
    561 	case ALU_OP2_ADDC_UINT:
    562 		r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
    563 	case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break;
    564 	case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break;
    565 	case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break;
    566 	case ALU_OP2_BFM_INT:
    567 		r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
    568 	case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break;
    569 	case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break;
    570 	case ALU_OP2_MAX:
    571 	case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break;
    572 	case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break;
    573 	case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break;
    574 	case ALU_OP2_MIN:
    575 	case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break;
    576 	case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break;
    577 	case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break;
    578 	case ALU_OP2_MUL:
    579 	case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break;
    580 	case ALU_OP2_MULHI_INT:
    581 		r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
    582 	case ALU_OP2_MULHI_UINT:
    583 		r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
    584 	case ALU_OP2_MULLO_INT:
    585 		r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
    586 	case ALU_OP2_MULLO_UINT:
    587 		r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
    588 	case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break;
    589 	case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break;
    590 	case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break;
    591 
    592 	default:
    593 		return false;
    594 	}
    595 
    596 	return true;
    597 }
    598 
    599 // fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
    600 bool expr_handler::fold_assoc(alu_node *n) {
    601 
    602 	alu_node *a = n;
    603 	literal cr;
    604 
    605 	int last_arg = -3;
    606 
    607 	unsigned op = n->bc.op;
    608 	bool allow_neg = false, cur_neg = false;
    609 	bool distribute_neg = false;
    610 
    611 	switch(op) {
    612 	case ALU_OP2_ADD:
    613 		distribute_neg = true;
    614 		allow_neg = true;
    615 		break;
    616 	case ALU_OP2_MUL:
    617 	case ALU_OP2_MUL_IEEE:
    618 		allow_neg = true;
    619 		break;
    620 	case ALU_OP3_MULADD:
    621 		allow_neg = true;
    622 		op = ALU_OP2_MUL;
    623 		break;
    624 	case ALU_OP3_MULADD_IEEE:
    625 		allow_neg = true;
    626 		op = ALU_OP2_MUL_IEEE;
    627 		break;
    628 	default:
    629 		if (n->bc.op_ptr->src_count != 2)
    630 			return false;
    631 	}
    632 
    633 	// check if we can evaluate the op
    634 	if (!eval_const_op(op, cr, literal(0), literal(0)))
    635 		return false;
    636 
    637 	while (true) {
    638 
    639 		value *v0 = a->src[0]->gvalue();
    640 		value *v1 = a->src[1]->gvalue();
    641 
    642 		last_arg = -2;
    643 
    644 		if (v1->is_const()) {
    645 			literal arg = v1->get_const_value();
    646 			apply_alu_src_mod(a->bc, 1, arg);
    647 			if (cur_neg && distribute_neg)
    648 				arg.f = -arg.f;
    649 
    650 			if (a == n)
    651 				cr = arg;
    652 			else
    653 				eval_const_op(op, cr, cr, arg);
    654 
    655 			if (v0->def) {
    656 				alu_node *d0 = static_cast<alu_node*>(v0->def);
    657 				if ((d0->is_alu_op(op) ||
    658 						(op == ALU_OP2_MUL_IEEE &&
    659 								d0->is_alu_op(ALU_OP2_MUL))) &&
    660 						!d0->bc.omod && !d0->bc.clamp &&
    661 						!a->bc.src[0].abs &&
    662 						(!a->bc.src[0].neg || allow_neg)) {
    663 					cur_neg ^= a->bc.src[0].neg;
    664 					a = d0;
    665 					continue;
    666 				}
    667 			}
    668 			last_arg = 0;
    669 
    670 		}
    671 
    672 		if (v0->is_const()) {
    673 			literal arg = v0->get_const_value();
    674 			apply_alu_src_mod(a->bc, 0, arg);
    675 			if (cur_neg && distribute_neg)
    676 				arg.f = -arg.f;
    677 
    678 			if (last_arg == 0) {
    679 				eval_const_op(op, cr, cr, arg);
    680 				last_arg = -1;
    681 				break;
    682 			}
    683 
    684 			if (a == n)
    685 				cr = arg;
    686 			else
    687 				eval_const_op(op, cr, cr, arg);
    688 
    689 			if (v1->def) {
    690 				alu_node *d1 = static_cast<alu_node*>(v1->def);
    691 				if ((d1->is_alu_op(op) ||
    692 						(op == ALU_OP2_MUL_IEEE &&
    693 								d1->is_alu_op(ALU_OP2_MUL))) &&
    694 						!d1->bc.omod && !d1->bc.clamp &&
    695 						!a->bc.src[1].abs &&
    696 						(!a->bc.src[1].neg || allow_neg)) {
    697 					cur_neg ^= a->bc.src[1].neg;
    698 					a = d1;
    699 					continue;
    700 				}
    701 			}
    702 
    703 			last_arg = 1;
    704 		}
    705 
    706 		break;
    707 	};
    708 
    709 	if (last_arg == -1) {
    710 		// result is const
    711 		apply_alu_dst_mod(n->bc, cr);
    712 
    713 		if (n->bc.op == op) {
    714 			convert_to_mov(*n, sh.get_const_value(cr));
    715 			fold_alu_op1(*n);
    716 			return true;
    717 		} else { // MULADD => ADD
    718 			n->src[0] = n->src[2];
    719 			n->bc.src[0] = n->bc.src[2];
    720 			n->src[1] = sh.get_const_value(cr);
    721 			memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
    722 
    723 			n->src.resize(2);
    724 			n->bc.set_op(ALU_OP2_ADD);
    725 		}
    726 	} else if (last_arg >= 0) {
    727 		n->src[0] = a->src[last_arg];
    728 		n->bc.src[0] = a->bc.src[last_arg];
    729 		n->bc.src[0].neg ^= cur_neg;
    730 		n->src[1] = sh.get_const_value(cr);
    731 		memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
    732 	}
    733 
    734 	return false;
    735 }
    736 
    737 bool expr_handler::fold_alu_op2(alu_node& n) {
    738 
    739 	if (n.src.size() < 2)
    740 		return false;
    741 
    742 	unsigned flags = n.bc.op_ptr->flags;
    743 
    744 	if (flags & AF_SET) {
    745 		return fold_setcc(n);
    746 	}
    747 
    748 	if (!sh.safe_math && (flags & AF_M_ASSOC)) {
    749 		if (fold_assoc(&n))
    750 			return true;
    751 	}
    752 
    753 	value* v0 = n.src[0]->gvalue();
    754 	value* v1 = n.src[1]->gvalue();
    755 
    756 	assert(v0 && v1);
    757 
    758 	// handle some operations with equal args, e.g. x + x => x * 2
    759 	if (v0 == v1) {
    760 		if (n.bc.src[0].neg == n.bc.src[1].neg &&
    761 				n.bc.src[0].abs == n.bc.src[1].abs) {
    762 			switch (n.bc.op) {
    763 			case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
    764 			case ALU_OP2_MIN_DX10:
    765 			case ALU_OP2_MAX:
    766 			case ALU_OP2_MAX_DX10:
    767 				convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
    768 				return fold_alu_op1(n);
    769 			case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)
    770 				if (!sh.safe_math) {
    771 					n.src[1] = sh.get_const_value(2.0f);
    772 					memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
    773 					n.bc.set_op(ALU_OP2_MUL);
    774 					return fold_alu_op2(n);
    775 				}
    776 				break;
    777 			}
    778 		}
    779 		if (n.bc.src[0].neg != n.bc.src[1].neg &&
    780 				n.bc.src[0].abs == n.bc.src[1].abs) {
    781 			switch (n.bc.op) {
    782 			case ALU_OP2_ADD:  // (ADD x, -x) => (MOV 0)
    783 				if (!sh.safe_math) {
    784 					convert_to_mov(n, sh.get_const_value(literal(0)));
    785 					return fold_alu_op1(n);
    786 				}
    787 				break;
    788 			}
    789 		}
    790 	}
    791 
    792 	if (n.bc.op == ALU_OP2_ADD) {
    793 		if (fold_mul_add(&n))
    794 			return true;
    795 	}
    796 
    797 	bool isc0 = v0->is_const();
    798 	bool isc1 = v1->is_const();
    799 
    800 	if (!isc0 && !isc1)
    801 		return false;
    802 
    803 	literal dv, cv0, cv1;
    804 
    805 	if (isc0) {
    806 		cv0 = v0->get_const_value();
    807 		apply_alu_src_mod(n.bc, 0, cv0);
    808 	}
    809 
    810 	if (isc1) {
    811 		cv1 = v1->get_const_value();
    812 		apply_alu_src_mod(n.bc, 1, cv1);
    813 	}
    814 
    815 	if (isc0 && isc1) {
    816 
    817 		if (!eval_const_op(n.bc.op, dv, cv0, cv1))
    818 			return false;
    819 
    820 	} else { // one source is const
    821 
    822 		if (isc0 && cv0 == literal(0)) {
    823 			switch (n.bc.op) {
    824 			case ALU_OP2_ADD:
    825 			case ALU_OP2_ADD_INT:
    826 			case ALU_OP2_MAX_UINT:
    827 			case ALU_OP2_OR_INT:
    828 			case ALU_OP2_XOR_INT:
    829 				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
    830 				return fold_alu_op1(n);
    831 			case ALU_OP2_AND_INT:
    832 			case ALU_OP2_ASHR_INT:
    833 			case ALU_OP2_LSHL_INT:
    834 			case ALU_OP2_LSHR_INT:
    835 			case ALU_OP2_MIN_UINT:
    836 			case ALU_OP2_MUL:
    837 			case ALU_OP2_MULHI_UINT:
    838 			case ALU_OP2_MULLO_UINT:
    839 				convert_to_mov(n, sh.get_const_value(literal(0)));
    840 				return fold_alu_op1(n);
    841 			}
    842 		} else if (isc1 && cv1 == literal(0)) {
    843 			switch (n.bc.op) {
    844 			case ALU_OP2_ADD:
    845 			case ALU_OP2_ADD_INT:
    846 			case ALU_OP2_ASHR_INT:
    847 			case ALU_OP2_LSHL_INT:
    848 			case ALU_OP2_LSHR_INT:
    849 			case ALU_OP2_MAX_UINT:
    850 			case ALU_OP2_OR_INT:
    851 			case ALU_OP2_SUB_INT:
    852 			case ALU_OP2_XOR_INT:
    853 				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
    854 				return fold_alu_op1(n);
    855 			case ALU_OP2_AND_INT:
    856 			case ALU_OP2_MIN_UINT:
    857 			case ALU_OP2_MUL:
    858 			case ALU_OP2_MULHI_UINT:
    859 			case ALU_OP2_MULLO_UINT:
    860 				convert_to_mov(n, sh.get_const_value(literal(0)));
    861 				return fold_alu_op1(n);
    862 			}
    863 		} else if (isc0 && cv0 == literal(1.0f)) {
    864 			switch (n.bc.op) {
    865 			case ALU_OP2_MUL:
    866 			case ALU_OP2_MUL_IEEE:
    867 				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
    868 				return fold_alu_op1(n);
    869 			}
    870 		} else if (isc1 && cv1 == literal(1.0f)) {
    871 			switch (n.bc.op) {
    872 			case ALU_OP2_MUL:
    873 			case ALU_OP2_MUL_IEEE:
    874 				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
    875 				return fold_alu_op1(n);
    876 			}
    877 		}
    878 
    879 		return false;
    880 	}
    881 
    882 	apply_alu_dst_mod(n.bc, dv);
    883 	assign_source(n.dst[0], get_const(dv));
    884 	return true;
    885 }
    886 
    887 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
    888                                       literal s1, literal s2) {
    889 
    890 	unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
    891 	unsigned cc = alu_cnd_flags & AF_CC_MASK;
    892 
    893 	switch (cmp_type) {
    894 	case AF_FLOAT_CMP: {
    895 		switch (cc) {
    896 		case AF_CC_E : return s1.f == s2.f;
    897 		case AF_CC_GT: return s1.f >  s2.f;
    898 		case AF_CC_GE: return s1.f >= s2.f;
    899 		case AF_CC_NE: return s1.f != s2.f;
    900 		case AF_CC_LT: return s1.f <  s2.f;
    901 		case AF_CC_LE: return s1.f <= s2.f;
    902 		default:
    903 			assert(!"invalid condition code");
    904 			return false;
    905 		}
    906 	}
    907 	case AF_INT_CMP: {
    908 		switch (cc) {
    909 		case AF_CC_E : return s1.i == s2.i;
    910 		case AF_CC_GT: return s1.i >  s2.i;
    911 		case AF_CC_GE: return s1.i >= s2.i;
    912 		case AF_CC_NE: return s1.i != s2.i;
    913 		case AF_CC_LT: return s1.i <  s2.i;
    914 		case AF_CC_LE: return s1.i <= s2.i;
    915 		default:
    916 			assert(!"invalid condition code");
    917 			return false;
    918 		}
    919 	}
    920 	case AF_UINT_CMP: {
    921 		switch (cc) {
    922 		case AF_CC_E : return s1.u == s2.u;
    923 		case AF_CC_GT: return s1.u >  s2.u;
    924 		case AF_CC_GE: return s1.u >= s2.u;
    925 		case AF_CC_NE: return s1.u != s2.u;
    926 		case AF_CC_LT: return s1.u <  s2.u;
    927 		case AF_CC_LE: return s1.u <= s2.u;
    928 		default:
    929 			assert(!"invalid condition code");
    930 			return false;
    931 		}
    932 	}
    933 	default:
    934 		assert(!"invalid cmp_type");
    935 		return false;
    936 	}
    937 }
    938 
    939 bool expr_handler::fold_alu_op3(alu_node& n) {
    940 
    941 	if (n.src.size() < 3)
    942 		return false;
    943 
    944 	if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) {
    945 		if (fold_assoc(&n))
    946 			return true;
    947 	}
    948 
    949 	value* v0 = n.src[0]->gvalue();
    950 	value* v1 = n.src[1]->gvalue();
    951 	value* v2 = n.src[2]->gvalue();
    952 
    953 	/* LDS instructions look like op3 with no dst - don't fold. */
    954 	if (!n.dst[0])
    955 		return false;
    956 	assert(v0 && v1 && v2 && n.dst[0]);
    957 
    958 	bool isc0 = v0->is_const();
    959 	bool isc1 = v1->is_const();
    960 	bool isc2 = v2->is_const();
    961 
    962 	literal dv, cv0, cv1, cv2;
    963 
    964 	if (isc0) {
    965 		cv0 = v0->get_const_value();
    966 		apply_alu_src_mod(n.bc, 0, cv0);
    967 	}
    968 
    969 	if (isc1) {
    970 		cv1 = v1->get_const_value();
    971 		apply_alu_src_mod(n.bc, 1, cv1);
    972 	}
    973 
    974 	if (isc2) {
    975 		cv2 = v2->get_const_value();
    976 		apply_alu_src_mod(n.bc, 2, cv2);
    977 	}
    978 
    979 	unsigned flags = n.bc.op_ptr->flags;
    980 
    981 	if (flags & AF_CMOV) {
    982 		int src = 0;
    983 
    984 		if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
    985 			// result doesn't depend on condition, convert to MOV
    986 			src = 1;
    987 		} else if (isc0) {
    988 			// src0 is const, condition can be evaluated, convert to MOV
    989 			bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
    990 					AF_CMP_TYPE_MASK), cv0, literal(0));
    991 			src = cond ? 1 : 2;
    992 		}
    993 
    994 		if (src) {
    995 			// if src is selected, convert to MOV
    996 			convert_to_mov(n, n.src[src], n.bc.src[src].neg);
    997 			return fold_alu_op1(n);
    998 		}
    999 	}
   1000 
   1001 	// handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
   1002 	if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD ||
   1003 			n.bc.op == ALU_OP3_MULADD_IEEE)) {
   1004 
   1005 		unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
   1006 				ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
   1007 
   1008 		if (!isc2 && v2->def && v2->def->is_alu_op(op)) {
   1009 
   1010 			alu_node *md = static_cast<alu_node*>(v2->def);
   1011 			value *mv0 = md->src[0]->gvalue();
   1012 			value *mv1 = md->src[1]->gvalue();
   1013 
   1014 			int es0 = -1, es1;
   1015 
   1016 			if (v0 == mv0) {
   1017 				es0 = 0;
   1018 				es1 = 0;
   1019 			} else if (v0 == mv1) {
   1020 				es0 = 0;
   1021 				es1 = 1;
   1022 			} else if (v1 == mv0) {
   1023 				es0 = 1;
   1024 				es1 = 0;
   1025 			} else if (v1 == mv1) {
   1026 				es0 = 1;
   1027 				es1 = 1;
   1028 			}
   1029 
   1030 			if (es0 != -1) {
   1031 				value *va0 = es0 == 0 ? v1 : v0;
   1032 				value *va1 = es1 == 0 ? mv1 : mv0;
   1033 
   1034 				alu_node *add = sh.create_alu();
   1035 				add->bc.set_op(ALU_OP2_ADD);
   1036 
   1037 				add->dst.resize(1);
   1038 				add->src.resize(2);
   1039 
   1040 				value *t = sh.create_temp_value();
   1041 				t->def = add;
   1042 				add->dst[0] = t;
   1043 				add->src[0] = va0;
   1044 				add->src[1] = va1;
   1045 				add->bc.src[0] = n.bc.src[!es0];
   1046 				add->bc.src[1] = md->bc.src[!es1];
   1047 
   1048 				add->bc.src[1].neg ^= n.bc.src[2].neg ^
   1049 						(n.bc.src[es0].neg != md->bc.src[es1].neg);
   1050 
   1051 				n.insert_before(add);
   1052 				vt.add_value(t);
   1053 
   1054 				t = t->gvalue();
   1055 
   1056 				if (es0 == 1) {
   1057 					n.src[0] = n.src[1];
   1058 					n.bc.src[0] = n.bc.src[1];
   1059 				}
   1060 
   1061 				n.src[1] = t;
   1062 				memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
   1063 
   1064 				n.src.resize(2);
   1065 
   1066 				n.bc.set_op(op);
   1067 				return fold_alu_op2(n);
   1068 			}
   1069 		}
   1070 	}
   1071 
   1072 	if (!isc0 && !isc1 && !isc2)
   1073 		return false;
   1074 
   1075 	if (isc0 && isc1 && isc2) {
   1076 		switch (n.bc.op) {
   1077 		case ALU_OP3_MULADD_IEEE:
   1078 		case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
   1079 
   1080 		// TODO
   1081 
   1082 		default:
   1083 			return false;
   1084 		}
   1085 	} else {
   1086 		if (isc0 && isc1) {
   1087 			switch (n.bc.op) {
   1088 			case ALU_OP3_MULADD:
   1089 			case ALU_OP3_MULADD_IEEE:
   1090 				dv = cv0.f * cv1.f;
   1091 				n.bc.set_op(ALU_OP2_ADD);
   1092 				n.src[0] = sh.get_const_value(dv);
   1093 				memset(&n.bc.src[0], 0, sizeof(bc_alu_src));
   1094 				n.src[1] = n.src[2];
   1095 				n.bc.src[1] = n.bc.src[2];
   1096 				n.src.resize(2);
   1097 				return fold_alu_op2(n);
   1098 			}
   1099 		}
   1100 
   1101 		if (n.bc.op == ALU_OP3_MULADD) {
   1102 			if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
   1103 				convert_to_mov(n, n.src[2], n.bc.src[2].neg,  n.bc.src[2].abs);
   1104 				return fold_alu_op1(n);
   1105 			}
   1106 		}
   1107 
   1108 		if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) {
   1109 			unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
   1110 					ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
   1111 
   1112 			if (isc1 && v0 == v2) {
   1113 				cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f);
   1114 				n.src[1] = sh.get_const_value(cv1);
   1115 				n.bc.src[1].neg = 0;
   1116 				n.bc.src[1].abs = 0;
   1117 				n.bc.set_op(op);
   1118 				n.src.resize(2);
   1119 				return fold_alu_op2(n);
   1120 			} else if (isc0 && v1 == v2) {
   1121 				cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f);
   1122 				n.src[0] = sh.get_const_value(cv0);
   1123 				n.bc.src[0].neg = 0;
   1124 				n.bc.src[0].abs = 0;
   1125 				n.bc.set_op(op);
   1126 				n.src.resize(2);
   1127 				return fold_alu_op2(n);
   1128 			}
   1129 		}
   1130 
   1131 		return false;
   1132 	}
   1133 
   1134 	apply_alu_dst_mod(n.bc, dv);
   1135 	assign_source(n.dst[0], get_const(dv));
   1136 	return true;
   1137 }
   1138 
   1139 unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
   1140 	unsigned ncc = 0;
   1141 
   1142 	switch (cc) {
   1143 	case AF_CC_E: ncc = AF_CC_NE; break;
   1144 	case AF_CC_NE: ncc = AF_CC_E; break;
   1145 	case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
   1146 	case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
   1147 	default:
   1148 		assert(!"unexpected condition code");
   1149 		break;
   1150 	}
   1151 	return ncc;
   1152 }
   1153 
   1154 unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
   1155 
   1156 	if (int_dst && cmp_type == AF_FLOAT_CMP) {
   1157 		switch (cc) {
   1158 		case AF_CC_E: return ALU_OP2_SETE_DX10;
   1159 		case AF_CC_NE: return ALU_OP2_SETNE_DX10;
   1160 		case AF_CC_GT: return ALU_OP2_SETGT_DX10;
   1161 		case AF_CC_GE: return ALU_OP2_SETGE_DX10;
   1162 		}
   1163 	} else {
   1164 
   1165 		switch(cmp_type) {
   1166 		case AF_FLOAT_CMP: {
   1167 			switch (cc) {
   1168 			case AF_CC_E: return ALU_OP2_SETE;
   1169 			case AF_CC_NE: return ALU_OP2_SETNE;
   1170 			case AF_CC_GT: return ALU_OP2_SETGT;
   1171 			case AF_CC_GE: return ALU_OP2_SETGE;
   1172 			}
   1173 			break;
   1174 		}
   1175 		case AF_INT_CMP: {
   1176 			switch (cc) {
   1177 			case AF_CC_E: return ALU_OP2_SETE_INT;
   1178 			case AF_CC_NE: return ALU_OP2_SETNE_INT;
   1179 			case AF_CC_GT: return ALU_OP2_SETGT_INT;
   1180 			case AF_CC_GE: return ALU_OP2_SETGE_INT;
   1181 			}
   1182 			break;
   1183 		}
   1184 		case AF_UINT_CMP: {
   1185 			switch (cc) {
   1186 			case AF_CC_E: return ALU_OP2_SETE_INT;
   1187 			case AF_CC_NE: return ALU_OP2_SETNE_INT;
   1188 			case AF_CC_GT: return ALU_OP2_SETGT_UINT;
   1189 			case AF_CC_GE: return ALU_OP2_SETGE_UINT;
   1190 			}
   1191 			break;
   1192 		}
   1193 		}
   1194 	}
   1195 
   1196 	assert(!"unexpected cc&cmp_type combination");
   1197 	return ~0u;
   1198 }
   1199 
   1200 unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
   1201 
   1202 	switch(cmp_type) {
   1203 	case AF_FLOAT_CMP: {
   1204 		switch (cc) {
   1205 		case AF_CC_E: return ALU_OP2_PRED_SETE;
   1206 		case AF_CC_NE: return ALU_OP2_PRED_SETNE;
   1207 		case AF_CC_GT: return ALU_OP2_PRED_SETGT;
   1208 		case AF_CC_GE: return ALU_OP2_PRED_SETGE;
   1209 		}
   1210 		break;
   1211 	}
   1212 	case AF_INT_CMP: {
   1213 		switch (cc) {
   1214 		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
   1215 		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
   1216 		case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
   1217 		case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
   1218 		}
   1219 		break;
   1220 	}
   1221 	case AF_UINT_CMP: {
   1222 		switch (cc) {
   1223 		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
   1224 		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
   1225 		case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
   1226 		case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
   1227 		}
   1228 		break;
   1229 	}
   1230 	}
   1231 
   1232 	assert(!"unexpected cc&cmp_type combination");
   1233 	return ~0u;
   1234 }
   1235 
   1236 unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
   1237 
   1238 	switch(cmp_type) {
   1239 	case AF_FLOAT_CMP: {
   1240 		switch (cc) {
   1241 		case AF_CC_E: return ALU_OP2_KILLE;
   1242 		case AF_CC_NE: return ALU_OP2_KILLNE;
   1243 		case AF_CC_GT: return ALU_OP2_KILLGT;
   1244 		case AF_CC_GE: return ALU_OP2_KILLGE;
   1245 		}
   1246 		break;
   1247 	}
   1248 	case AF_INT_CMP: {
   1249 		switch (cc) {
   1250 		case AF_CC_E: return ALU_OP2_KILLE_INT;
   1251 		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
   1252 		case AF_CC_GT: return ALU_OP2_KILLGT_INT;
   1253 		case AF_CC_GE: return ALU_OP2_KILLGE_INT;
   1254 		}
   1255 		break;
   1256 	}
   1257 	case AF_UINT_CMP: {
   1258 		switch (cc) {
   1259 		case AF_CC_E: return ALU_OP2_KILLE_INT;
   1260 		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
   1261 		case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
   1262 		case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
   1263 		}
   1264 		break;
   1265 	}
   1266 	}
   1267 
   1268 	assert(!"unexpected cc&cmp_type combination");
   1269 	return ~0u;
   1270 }
   1271 
   1272 unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
   1273 
   1274 	switch(cmp_type) {
   1275 	case AF_FLOAT_CMP: {
   1276 		switch (cc) {
   1277 		case AF_CC_E: return ALU_OP3_CNDE;
   1278 		case AF_CC_GT: return ALU_OP3_CNDGT;
   1279 		case AF_CC_GE: return ALU_OP3_CNDGE;
   1280 		}
   1281 		break;
   1282 	}
   1283 	case AF_INT_CMP: {
   1284 		switch (cc) {
   1285 		case AF_CC_E: return ALU_OP3_CNDE_INT;
   1286 		case AF_CC_GT: return ALU_OP3_CNDGT_INT;
   1287 		case AF_CC_GE: return ALU_OP3_CNDGE_INT;
   1288 		}
   1289 		break;
   1290 	}
   1291 	}
   1292 
   1293 	assert(!"unexpected cc&cmp_type combination");
   1294 	return ~0u;
   1295 }
   1296 
   1297 
   1298 void convert_predset_to_set(shader& sh, alu_node* a) {
   1299 
   1300 	unsigned flags = a->bc.op_ptr->flags;
   1301 	unsigned cc = flags & AF_CC_MASK;
   1302 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
   1303 
   1304 	bool swap_args = false;
   1305 
   1306 	cc = invert_setcc_condition(cc, swap_args);
   1307 
   1308 	unsigned newop = get_setcc_op(cc, cmp_type, true);
   1309 
   1310 	a->dst.resize(1);
   1311 	a->bc.set_op(newop);
   1312 
   1313 	if (swap_args) {
   1314 		std::swap(a->src[0], a->src[1]);
   1315 		std::swap(a->bc.src[0], a->bc.src[1]);
   1316 	}
   1317 
   1318 	a->bc.update_exec_mask = 0;
   1319 	a->bc.update_pred = 0;
   1320 }
   1321 
   1322 } // namespace r600_sb
   1323