1 /* 2 * Copyright 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file lower_instructions.cpp 26 * 27 * Many GPUs lack native instructions for certain expression operations, and 28 * must replace them with some other expression tree. This pass lowers some 29 * of the most common cases, allowing the lowering code to be implemented once 30 * rather than in each driver backend. 31 * 32 * Currently supported transformations: 33 * - SUB_TO_ADD_NEG 34 * - DIV_TO_MUL_RCP 35 * - INT_DIV_TO_MUL_RCP 36 * - EXP_TO_EXP2 37 * - POW_TO_EXP2 38 * - LOG_TO_LOG2 39 * - MOD_TO_FLOOR 40 * - LDEXP_TO_ARITH 41 * - DFREXP_TO_ARITH 42 * - CARRY_TO_ARITH 43 * - BORROW_TO_ARITH 44 * - SAT_TO_CLAMP 45 * - DOPS_TO_DFRAC 46 * 47 * SUB_TO_ADD_NEG: 48 * --------------- 49 * Breaks an ir_binop_sub expression down to add(op0, neg(op1)) 50 * 51 * This simplifies expression reassociation, and for many backends 52 * there is no subtract operation separate from adding the negation. 53 * For backends with native subtract operations, they will probably 54 * want to recognize add(op0, neg(op1)) or the other way around to 55 * produce a subtract anyway. 56 * 57 * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP: 58 * --------------------------------------------------------- 59 * Breaks an ir_binop_div expression down to op0 * (rcp(op1)). 60 * 61 * Many GPUs don't have a divide instruction (945 and 965 included), 62 * but they do have an RCP instruction to compute an approximate 63 * reciprocal. By breaking the operation down, constant reciprocals 64 * can get constant folded. 65 * 66 * FDIV_TO_MUL_RCP only lowers single-precision floating point division; 67 * DDIV_TO_MUL_RCP only lowers double-precision floating point division. 68 * DIV_TO_MUL_RCP is a convenience macro that sets both flags. 69 * INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating 70 * point so that RCP is possible. 71 * 72 * EXP_TO_EXP2 and LOG_TO_LOG2: 73 * ---------------------------- 74 * Many GPUs don't have a base e log or exponent instruction, but they 75 * do have base 2 versions, so this pass converts exp and log to exp2 76 * and log2 operations. 77 * 78 * POW_TO_EXP2: 79 * ----------- 80 * Many older GPUs don't have an x**y instruction. For these GPUs, convert 81 * x**y to 2**(y * log2(x)). 82 * 83 * MOD_TO_FLOOR: 84 * ------------- 85 * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1)) 86 * 87 * Many GPUs don't have a MOD instruction (945 and 965 included), and 88 * if we have to break it down like this anyway, it gives an 89 * opportunity to do things like constant fold the (1.0 / op1) easily. 90 * 91 * Note: before we used to implement this as op1 * fract(op / op1) but this 92 * implementation had significant precision errors. 93 * 94 * LDEXP_TO_ARITH: 95 * ------------- 96 * Converts ir_binop_ldexp to arithmetic and bit operations for float sources. 97 * 98 * DFREXP_DLDEXP_TO_ARITH: 99 * --------------- 100 * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to 101 * arithmetic and bit ops for double arguments. 102 * 103 * CARRY_TO_ARITH: 104 * --------------- 105 * Converts ir_carry into (x + y) < x. 106 * 107 * BORROW_TO_ARITH: 108 * ---------------- 109 * Converts ir_borrow into (x < y). 110 * 111 * SAT_TO_CLAMP: 112 * ------------- 113 * Converts ir_unop_saturate into min(max(x, 0.0), 1.0) 114 * 115 * DOPS_TO_DFRAC: 116 * -------------- 117 * Converts double trunc, ceil, floor, round to fract 118 */ 119 120 #include "c99_math.h" 121 #include "program/prog_instruction.h" /* for swizzle */ 122 #include "compiler/glsl_types.h" 123 #include "ir.h" 124 #include "ir_builder.h" 125 #include "ir_optimization.h" 126 127 using namespace ir_builder; 128 129 namespace { 130 131 class lower_instructions_visitor : public ir_hierarchical_visitor { 132 public: 133 lower_instructions_visitor(unsigned lower) 134 : progress(false), lower(lower) { } 135 136 ir_visitor_status visit_leave(ir_expression *); 137 138 bool progress; 139 140 private: 141 unsigned lower; /** Bitfield of which operations to lower */ 142 143 void sub_to_add_neg(ir_expression *); 144 void div_to_mul_rcp(ir_expression *); 145 void int_div_to_mul_rcp(ir_expression *); 146 void mod_to_floor(ir_expression *); 147 void exp_to_exp2(ir_expression *); 148 void pow_to_exp2(ir_expression *); 149 void log_to_log2(ir_expression *); 150 void ldexp_to_arith(ir_expression *); 151 void dldexp_to_arith(ir_expression *); 152 void dfrexp_sig_to_arith(ir_expression *); 153 void dfrexp_exp_to_arith(ir_expression *); 154 void carry_to_arith(ir_expression *); 155 void borrow_to_arith(ir_expression *); 156 void sat_to_clamp(ir_expression *); 157 void double_dot_to_fma(ir_expression *); 158 void double_lrp(ir_expression *); 159 void dceil_to_dfrac(ir_expression *); 160 void dfloor_to_dfrac(ir_expression *); 161 void dround_even_to_dfrac(ir_expression *); 162 void dtrunc_to_dfrac(ir_expression *); 163 void dsign_to_csel(ir_expression *); 164 void bit_count_to_math(ir_expression *); 165 void extract_to_shifts(ir_expression *); 166 void insert_to_shifts(ir_expression *); 167 void reverse_to_shifts(ir_expression *ir); 168 void find_lsb_to_float_cast(ir_expression *ir); 169 void find_msb_to_float_cast(ir_expression *ir); 170 void imul_high_to_mul(ir_expression *ir); 171 172 ir_expression *_carry(operand a, operand b); 173 }; 174 175 } /* anonymous namespace */ 176 177 /** 178 * Determine if a particular type of lowering should occur 179 */ 180 #define lowering(x) (this->lower & x) 181 182 bool 183 lower_instructions(exec_list *instructions, unsigned what_to_lower) 184 { 185 lower_instructions_visitor v(what_to_lower); 186 187 visit_list_elements(&v, instructions); 188 return v.progress; 189 } 190 191 void 192 lower_instructions_visitor::sub_to_add_neg(ir_expression *ir) 193 { 194 ir->operation = ir_binop_add; 195 ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type, 196 ir->operands[1], NULL); 197 this->progress = true; 198 } 199 200 void 201 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir) 202 { 203 assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double()); 204 205 /* New expression for the 1.0 / op1 */ 206 ir_rvalue *expr; 207 expr = new(ir) ir_expression(ir_unop_rcp, 208 ir->operands[1]->type, 209 ir->operands[1]); 210 211 /* op0 / op1 -> op0 * (1.0 / op1) */ 212 ir->operation = ir_binop_mul; 213 ir->operands[1] = expr; 214 215 this->progress = true; 216 } 217 218 void 219 lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir) 220 { 221 assert(ir->operands[1]->type->is_integer()); 222 223 /* Be careful with integer division -- we need to do it as a 224 * float and re-truncate, since rcp(n > 1) of an integer would 225 * just be 0. 226 */ 227 ir_rvalue *op0, *op1; 228 const struct glsl_type *vec_type; 229 230 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 231 ir->operands[1]->type->vector_elements, 232 ir->operands[1]->type->matrix_columns); 233 234 if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) 235 op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL); 236 else 237 op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL); 238 239 op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL); 240 241 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 242 ir->operands[0]->type->vector_elements, 243 ir->operands[0]->type->matrix_columns); 244 245 if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) 246 op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL); 247 else 248 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL); 249 250 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 251 ir->type->vector_elements, 252 ir->type->matrix_columns); 253 254 op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); 255 256 if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) { 257 ir->operation = ir_unop_f2i; 258 ir->operands[0] = op0; 259 } else { 260 ir->operation = ir_unop_i2u; 261 ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0); 262 } 263 ir->operands[1] = NULL; 264 265 this->progress = true; 266 } 267 268 void 269 lower_instructions_visitor::exp_to_exp2(ir_expression *ir) 270 { 271 ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E)); 272 273 ir->operation = ir_unop_exp2; 274 ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type, 275 ir->operands[0], log2_e); 276 this->progress = true; 277 } 278 279 void 280 lower_instructions_visitor::pow_to_exp2(ir_expression *ir) 281 { 282 ir_expression *const log2_x = 283 new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, 284 ir->operands[0]); 285 286 ir->operation = ir_unop_exp2; 287 ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type, 288 ir->operands[1], log2_x); 289 ir->operands[1] = NULL; 290 this->progress = true; 291 } 292 293 void 294 lower_instructions_visitor::log_to_log2(ir_expression *ir) 295 { 296 ir->operation = ir_binop_mul; 297 ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, 298 ir->operands[0], NULL); 299 ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E)); 300 this->progress = true; 301 } 302 303 void 304 lower_instructions_visitor::mod_to_floor(ir_expression *ir) 305 { 306 ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x", 307 ir_var_temporary); 308 ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y", 309 ir_var_temporary); 310 this->base_ir->insert_before(x); 311 this->base_ir->insert_before(y); 312 313 ir_assignment *const assign_x = 314 new(ir) ir_assignment(new(ir) ir_dereference_variable(x), 315 ir->operands[0], NULL); 316 ir_assignment *const assign_y = 317 new(ir) ir_assignment(new(ir) ir_dereference_variable(y), 318 ir->operands[1], NULL); 319 320 this->base_ir->insert_before(assign_x); 321 this->base_ir->insert_before(assign_y); 322 323 ir_expression *const div_expr = 324 new(ir) ir_expression(ir_binop_div, x->type, 325 new(ir) ir_dereference_variable(x), 326 new(ir) ir_dereference_variable(y)); 327 328 /* Don't generate new IR that would need to be lowered in an additional 329 * pass. 330 */ 331 if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) || 332 (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double())) 333 div_to_mul_rcp(div_expr); 334 335 ir_expression *const floor_expr = 336 new(ir) ir_expression(ir_unop_floor, x->type, div_expr); 337 338 if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 339 dfloor_to_dfrac(floor_expr); 340 341 ir_expression *const mul_expr = 342 new(ir) ir_expression(ir_binop_mul, 343 new(ir) ir_dereference_variable(y), 344 floor_expr); 345 346 ir->operation = ir_binop_sub; 347 ir->operands[0] = new(ir) ir_dereference_variable(x); 348 ir->operands[1] = mul_expr; 349 this->progress = true; 350 } 351 352 void 353 lower_instructions_visitor::ldexp_to_arith(ir_expression *ir) 354 { 355 /* Translates 356 * ir_binop_ldexp x exp 357 * into 358 * 359 * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); 360 * resulting_biased_exp = extracted_biased_exp + exp; 361 * 362 * if (resulting_biased_exp < 1 || x == 0.0f) { 363 * return copysign(0.0, x); 364 * } 365 * 366 * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | 367 * lshift(i2u(resulting_biased_exp), exp_shift)); 368 * 369 * which we can't actually implement as such, since the GLSL IR doesn't 370 * have vectorized if-statements. We actually implement it without branches 371 * using conditional-select: 372 * 373 * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); 374 * resulting_biased_exp = extracted_biased_exp + exp; 375 * 376 * is_not_zero_or_underflow = logic_and(nequal(x, 0.0f), 377 * gequal(resulting_biased_exp, 1); 378 * x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x)); 379 * resulting_biased_exp = csel(is_not_zero_or_underflow, 380 * resulting_biased_exp, 0); 381 * 382 * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | 383 * lshift(i2u(resulting_biased_exp), exp_shift)); 384 */ 385 386 const unsigned vec_elem = ir->type->vector_elements; 387 388 /* Types */ 389 const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); 390 const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); 391 392 /* Constants */ 393 ir_constant *zeroi = ir_constant::zero(ir, ivec); 394 395 ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem); 396 397 ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem); 398 399 /* Temporary variables */ 400 ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); 401 ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); 402 403 ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", 404 ir_var_temporary); 405 406 ir_variable *extracted_biased_exp = 407 new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); 408 ir_variable *resulting_biased_exp = 409 new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); 410 411 ir_variable *is_not_zero_or_underflow = 412 new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); 413 414 ir_instruction &i = *base_ir; 415 416 /* Copy <x> and <exp> arguments. */ 417 i.insert_before(x); 418 i.insert_before(assign(x, ir->operands[0])); 419 i.insert_before(exp); 420 i.insert_before(assign(exp, ir->operands[1])); 421 422 /* Extract the biased exponent from <x>. */ 423 i.insert_before(extracted_biased_exp); 424 i.insert_before(assign(extracted_biased_exp, 425 rshift(bitcast_f2i(abs(x)), exp_shift))); 426 427 i.insert_before(resulting_biased_exp); 428 i.insert_before(assign(resulting_biased_exp, 429 add(extracted_biased_exp, exp))); 430 431 /* Test if result is 0.0, subnormal, or underflow by checking if the 432 * resulting biased exponent would be less than 0x1. If so, the result is 433 * 0.0 with the sign of x. (Actually, invert the conditions so that 434 * immediate values are the second arguments, which is better for i965) 435 */ 436 i.insert_before(zero_sign_x); 437 i.insert_before(assign(zero_sign_x, 438 bitcast_u2f(bit_and(bitcast_f2u(x), sign_mask)))); 439 440 i.insert_before(is_not_zero_or_underflow); 441 i.insert_before(assign(is_not_zero_or_underflow, 442 logic_and(nequal(x, new(ir) ir_constant(0.0f, vec_elem)), 443 gequal(resulting_biased_exp, 444 new(ir) ir_constant(0x1, vec_elem))))); 445 i.insert_before(assign(x, csel(is_not_zero_or_underflow, 446 x, zero_sign_x))); 447 i.insert_before(assign(resulting_biased_exp, 448 csel(is_not_zero_or_underflow, 449 resulting_biased_exp, zeroi))); 450 451 /* We could test for overflows by checking if the resulting biased exponent 452 * would be greater than 0xFE. Turns out we don't need to because the GLSL 453 * spec says: 454 * 455 * "If this product is too large to be represented in the 456 * floating-point type, the result is undefined." 457 */ 458 459 ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL); 460 461 /* Don't generate new IR that would need to be lowered in an additional 462 * pass. 463 */ 464 if (!lowering(INSERT_TO_SHIFTS)) { 465 ir_constant *exp_width = new(ir) ir_constant(8, vec_elem); 466 ir->operation = ir_unop_bitcast_i2f; 467 ir->operands[0] = bitfield_insert(bitcast_f2i(x), resulting_biased_exp, 468 exp_shift_clone, exp_width); 469 ir->operands[1] = NULL; 470 } else { 471 ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fffffu, vec_elem); 472 ir->operation = ir_unop_bitcast_u2f; 473 ir->operands[0] = bit_or(bit_and(bitcast_f2u(x), sign_mantissa_mask), 474 lshift(i2u(resulting_biased_exp), exp_shift_clone)); 475 } 476 477 this->progress = true; 478 } 479 480 void 481 lower_instructions_visitor::dldexp_to_arith(ir_expression *ir) 482 { 483 /* See ldexp_to_arith for structure. Uses frexp_exp to extract the exponent 484 * from the significand. 485 */ 486 487 const unsigned vec_elem = ir->type->vector_elements; 488 489 /* Types */ 490 const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); 491 const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); 492 493 /* Constants */ 494 ir_constant *zeroi = ir_constant::zero(ir, ivec); 495 496 ir_constant *sign_mask = new(ir) ir_constant(0x80000000u); 497 498 ir_constant *exp_shift = new(ir) ir_constant(20u); 499 ir_constant *exp_width = new(ir) ir_constant(11u); 500 ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem); 501 502 /* Temporary variables */ 503 ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); 504 ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); 505 506 ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", 507 ir_var_temporary); 508 509 ir_variable *extracted_biased_exp = 510 new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); 511 ir_variable *resulting_biased_exp = 512 new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); 513 514 ir_variable *is_not_zero_or_underflow = 515 new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); 516 517 ir_instruction &i = *base_ir; 518 519 /* Copy <x> and <exp> arguments. */ 520 i.insert_before(x); 521 i.insert_before(assign(x, ir->operands[0])); 522 i.insert_before(exp); 523 i.insert_before(assign(exp, ir->operands[1])); 524 525 ir_expression *frexp_exp = expr(ir_unop_frexp_exp, x); 526 if (lowering(DFREXP_DLDEXP_TO_ARITH)) 527 dfrexp_exp_to_arith(frexp_exp); 528 529 /* Extract the biased exponent from <x>. */ 530 i.insert_before(extracted_biased_exp); 531 i.insert_before(assign(extracted_biased_exp, add(frexp_exp, exp_bias))); 532 533 i.insert_before(resulting_biased_exp); 534 i.insert_before(assign(resulting_biased_exp, 535 add(extracted_biased_exp, exp))); 536 537 /* Test if result is 0.0, subnormal, or underflow by checking if the 538 * resulting biased exponent would be less than 0x1. If so, the result is 539 * 0.0 with the sign of x. (Actually, invert the conditions so that 540 * immediate values are the second arguments, which is better for i965) 541 * TODO: Implement in a vector fashion. 542 */ 543 i.insert_before(zero_sign_x); 544 for (unsigned elem = 0; elem < vec_elem; elem++) { 545 ir_variable *unpacked = 546 new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); 547 i.insert_before(unpacked); 548 i.insert_before( 549 assign(unpacked, 550 expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); 551 i.insert_before(assign(unpacked, bit_and(swizzle_y(unpacked), sign_mask->clone(ir, NULL)), 552 WRITEMASK_Y)); 553 i.insert_before(assign(unpacked, ir_constant::zero(ir, glsl_type::uint_type), WRITEMASK_X)); 554 i.insert_before(assign(zero_sign_x, 555 expr(ir_unop_pack_double_2x32, unpacked), 556 1 << elem)); 557 } 558 i.insert_before(is_not_zero_or_underflow); 559 i.insert_before(assign(is_not_zero_or_underflow, 560 gequal(resulting_biased_exp, 561 new(ir) ir_constant(0x1, vec_elem)))); 562 i.insert_before(assign(x, csel(is_not_zero_or_underflow, 563 x, zero_sign_x))); 564 i.insert_before(assign(resulting_biased_exp, 565 csel(is_not_zero_or_underflow, 566 resulting_biased_exp, zeroi))); 567 568 /* We could test for overflows by checking if the resulting biased exponent 569 * would be greater than 0xFE. Turns out we don't need to because the GLSL 570 * spec says: 571 * 572 * "If this product is too large to be represented in the 573 * floating-point type, the result is undefined." 574 */ 575 576 ir_rvalue *results[4] = {NULL}; 577 for (unsigned elem = 0; elem < vec_elem; elem++) { 578 ir_variable *unpacked = 579 new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); 580 i.insert_before(unpacked); 581 i.insert_before( 582 assign(unpacked, 583 expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); 584 585 ir_expression *bfi = bitfield_insert( 586 swizzle_y(unpacked), 587 i2u(swizzle(resulting_biased_exp, elem, 1)), 588 exp_shift->clone(ir, NULL), 589 exp_width->clone(ir, NULL)); 590 591 i.insert_before(assign(unpacked, bfi, WRITEMASK_Y)); 592 593 results[elem] = expr(ir_unop_pack_double_2x32, unpacked); 594 } 595 596 ir->operation = ir_quadop_vector; 597 ir->operands[0] = results[0]; 598 ir->operands[1] = results[1]; 599 ir->operands[2] = results[2]; 600 ir->operands[3] = results[3]; 601 602 /* Don't generate new IR that would need to be lowered in an additional 603 * pass. 604 */ 605 606 this->progress = true; 607 } 608 609 void 610 lower_instructions_visitor::dfrexp_sig_to_arith(ir_expression *ir) 611 { 612 const unsigned vec_elem = ir->type->vector_elements; 613 const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); 614 615 /* Double-precision floating-point values are stored as 616 * 1 sign bit; 617 * 11 exponent bits; 618 * 52 mantissa bits. 619 * 620 * We're just extracting the significand here, so we only need to modify 621 * the upper 32-bit uint. Unfortunately we must extract each double 622 * independently as there is no vector version of unpackDouble. 623 */ 624 625 ir_instruction &i = *base_ir; 626 627 ir_variable *is_not_zero = 628 new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); 629 ir_rvalue *results[4] = {NULL}; 630 631 ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); 632 i.insert_before(is_not_zero); 633 i.insert_before( 634 assign(is_not_zero, 635 nequal(abs(ir->operands[0]->clone(ir, NULL)), dzero))); 636 637 /* TODO: Remake this as more vector-friendly when int64 support is 638 * available. 639 */ 640 for (unsigned elem = 0; elem < vec_elem; elem++) { 641 ir_constant *zero = new(ir) ir_constant(0u, 1); 642 ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x800fffffu, 1); 643 644 /* Exponent of double floating-point values in the range [0.5, 1.0). */ 645 ir_constant *exponent_value = new(ir) ir_constant(0x3fe00000u, 1); 646 647 ir_variable *bits = 648 new(ir) ir_variable(glsl_type::uint_type, "bits", ir_var_temporary); 649 ir_variable *unpacked = 650 new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); 651 652 ir_rvalue *x = swizzle(ir->operands[0]->clone(ir, NULL), elem, 1); 653 654 i.insert_before(bits); 655 i.insert_before(unpacked); 656 i.insert_before(assign(unpacked, expr(ir_unop_unpack_double_2x32, x))); 657 658 /* Manipulate the high uint to remove the exponent and replace it with 659 * either the default exponent or zero. 660 */ 661 i.insert_before(assign(bits, swizzle_y(unpacked))); 662 i.insert_before(assign(bits, bit_and(bits, sign_mantissa_mask))); 663 i.insert_before(assign(bits, bit_or(bits, 664 csel(swizzle(is_not_zero, elem, 1), 665 exponent_value, 666 zero)))); 667 i.insert_before(assign(unpacked, bits, WRITEMASK_Y)); 668 results[elem] = expr(ir_unop_pack_double_2x32, unpacked); 669 } 670 671 /* Put the dvec back together */ 672 ir->operation = ir_quadop_vector; 673 ir->operands[0] = results[0]; 674 ir->operands[1] = results[1]; 675 ir->operands[2] = results[2]; 676 ir->operands[3] = results[3]; 677 678 this->progress = true; 679 } 680 681 void 682 lower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir) 683 { 684 const unsigned vec_elem = ir->type->vector_elements; 685 const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); 686 const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); 687 688 /* Double-precision floating-point values are stored as 689 * 1 sign bit; 690 * 11 exponent bits; 691 * 52 mantissa bits. 692 * 693 * We're just extracting the exponent here, so we only care about the upper 694 * 32-bit uint. 695 */ 696 697 ir_instruction &i = *base_ir; 698 699 ir_variable *is_not_zero = 700 new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); 701 ir_variable *high_words = 702 new(ir) ir_variable(uvec, "high_words", ir_var_temporary); 703 ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); 704 ir_constant *izero = new(ir) ir_constant(0, vec_elem); 705 706 ir_rvalue *absval = abs(ir->operands[0]); 707 708 i.insert_before(is_not_zero); 709 i.insert_before(high_words); 710 i.insert_before(assign(is_not_zero, nequal(absval->clone(ir, NULL), dzero))); 711 712 /* Extract all of the upper uints. */ 713 for (unsigned elem = 0; elem < vec_elem; elem++) { 714 ir_rvalue *x = swizzle(absval->clone(ir, NULL), elem, 1); 715 716 i.insert_before(assign(high_words, 717 swizzle_y(expr(ir_unop_unpack_double_2x32, x)), 718 1 << elem)); 719 720 } 721 ir_constant *exponent_shift = new(ir) ir_constant(20, vec_elem); 722 ir_constant *exponent_bias = new(ir) ir_constant(-1022, vec_elem); 723 724 /* For non-zero inputs, shift the exponent down and apply bias. */ 725 ir->operation = ir_triop_csel; 726 ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero); 727 ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift))); 728 ir->operands[2] = izero; 729 730 this->progress = true; 731 } 732 733 void 734 lower_instructions_visitor::carry_to_arith(ir_expression *ir) 735 { 736 /* Translates 737 * ir_binop_carry x y 738 * into 739 * sum = ir_binop_add x y 740 * bcarry = ir_binop_less sum x 741 * carry = ir_unop_b2i bcarry 742 */ 743 744 ir_rvalue *x_clone = ir->operands[0]->clone(ir, NULL); 745 ir->operation = ir_unop_i2u; 746 ir->operands[0] = b2i(less(add(ir->operands[0], ir->operands[1]), x_clone)); 747 ir->operands[1] = NULL; 748 749 this->progress = true; 750 } 751 752 void 753 lower_instructions_visitor::borrow_to_arith(ir_expression *ir) 754 { 755 /* Translates 756 * ir_binop_borrow x y 757 * into 758 * bcarry = ir_binop_less x y 759 * carry = ir_unop_b2i bcarry 760 */ 761 762 ir->operation = ir_unop_i2u; 763 ir->operands[0] = b2i(less(ir->operands[0], ir->operands[1])); 764 ir->operands[1] = NULL; 765 766 this->progress = true; 767 } 768 769 void 770 lower_instructions_visitor::sat_to_clamp(ir_expression *ir) 771 { 772 /* Translates 773 * ir_unop_saturate x 774 * into 775 * ir_binop_min (ir_binop_max(x, 0.0), 1.0) 776 */ 777 778 ir->operation = ir_binop_min; 779 ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type, 780 ir->operands[0], 781 new(ir) ir_constant(0.0f)); 782 ir->operands[1] = new(ir) ir_constant(1.0f); 783 784 this->progress = true; 785 } 786 787 void 788 lower_instructions_visitor::double_dot_to_fma(ir_expression *ir) 789 { 790 ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type->get_base_type(), "dot_res", 791 ir_var_temporary); 792 this->base_ir->insert_before(temp); 793 794 int nc = ir->operands[0]->type->components(); 795 for (int i = nc - 1; i >= 1; i--) { 796 ir_assignment *assig; 797 if (i == (nc - 1)) { 798 assig = assign(temp, mul(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), 799 swizzle(ir->operands[1]->clone(ir, NULL), i, 1))); 800 } else { 801 assig = assign(temp, fma(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), 802 swizzle(ir->operands[1]->clone(ir, NULL), i, 1), 803 temp)); 804 } 805 this->base_ir->insert_before(assig); 806 } 807 808 ir->operation = ir_triop_fma; 809 ir->operands[0] = swizzle(ir->operands[0], 0, 1); 810 ir->operands[1] = swizzle(ir->operands[1], 0, 1); 811 ir->operands[2] = new(ir) ir_dereference_variable(temp); 812 813 this->progress = true; 814 815 } 816 817 void 818 lower_instructions_visitor::double_lrp(ir_expression *ir) 819 { 820 int swizval; 821 ir_rvalue *op0 = ir->operands[0], *op2 = ir->operands[2]; 822 ir_constant *one = new(ir) ir_constant(1.0, op2->type->vector_elements); 823 824 switch (op2->type->vector_elements) { 825 case 1: 826 swizval = SWIZZLE_XXXX; 827 break; 828 default: 829 assert(op0->type->vector_elements == op2->type->vector_elements); 830 swizval = SWIZZLE_XYZW; 831 break; 832 } 833 834 ir->operation = ir_triop_fma; 835 ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements); 836 ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0); 837 838 this->progress = true; 839 } 840 841 void 842 lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir) 843 { 844 /* 845 * frtemp = frac(x); 846 * temp = sub(x, frtemp); 847 * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0); 848 */ 849 ir_instruction &i = *base_ir; 850 ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); 851 ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); 852 ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", 853 ir_var_temporary); 854 855 i.insert_before(frtemp); 856 i.insert_before(assign(frtemp, fract(ir->operands[0]))); 857 858 ir->operation = ir_binop_add; 859 ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp); 860 ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL)); 861 862 this->progress = true; 863 } 864 865 void 866 lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir) 867 { 868 /* 869 * frtemp = frac(x); 870 * result = sub(x, frtemp); 871 */ 872 ir->operation = ir_binop_sub; 873 ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL)); 874 875 this->progress = true; 876 } 877 void 878 lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir) 879 { 880 /* 881 * insane but works 882 * temp = x + 0.5; 883 * frtemp = frac(temp); 884 * t2 = sub(temp, frtemp); 885 * if (frac(x) == 0.5) 886 * result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1; 887 * else 888 * result = t2; 889 890 */ 891 ir_instruction &i = *base_ir; 892 ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", 893 ir_var_temporary); 894 ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", 895 ir_var_temporary); 896 ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2", 897 ir_var_temporary); 898 ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements); 899 ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); 900 ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); 901 902 i.insert_before(temp); 903 i.insert_before(assign(temp, add(ir->operands[0], p5))); 904 905 i.insert_before(frtemp); 906 i.insert_before(assign(frtemp, fract(temp))); 907 908 i.insert_before(t2); 909 i.insert_before(assign(t2, sub(temp, frtemp))); 910 911 ir->operation = ir_triop_csel; 912 ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)), 913 p5->clone(ir, NULL)); 914 ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))), 915 zero), 916 t2, 917 sub(t2, one)); 918 ir->operands[2] = new(ir) ir_dereference_variable(t2); 919 920 this->progress = true; 921 } 922 923 void 924 lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir) 925 { 926 /* 927 * frtemp = frac(x); 928 * temp = sub(x, frtemp); 929 * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1; 930 */ 931 ir_rvalue *arg = ir->operands[0]; 932 ir_instruction &i = *base_ir; 933 934 ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); 935 ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); 936 ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp", 937 ir_var_temporary); 938 ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", 939 ir_var_temporary); 940 941 i.insert_before(frtemp); 942 i.insert_before(assign(frtemp, fract(arg))); 943 i.insert_before(temp); 944 i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp))); 945 946 ir->operation = ir_triop_csel; 947 ir->operands[0] = gequal(arg->clone(ir, NULL), zero); 948 ir->operands[1] = new (ir) ir_dereference_variable(temp); 949 ir->operands[2] = add(temp, 950 csel(equal(frtemp, zero->clone(ir, NULL)), 951 zero->clone(ir, NULL), 952 one)); 953 954 this->progress = true; 955 } 956 957 void 958 lower_instructions_visitor::dsign_to_csel(ir_expression *ir) 959 { 960 /* 961 * temp = x > 0.0 ? 1.0 : 0.0; 962 * result = x < 0.0 ? -1.0 : temp; 963 */ 964 ir_rvalue *arg = ir->operands[0]; 965 ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); 966 ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); 967 ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements); 968 969 ir->operation = ir_triop_csel; 970 ir->operands[0] = less(arg->clone(ir, NULL), 971 zero->clone(ir, NULL)); 972 ir->operands[1] = neg_one; 973 ir->operands[2] = csel(greater(arg, zero), 974 one, 975 zero->clone(ir, NULL)); 976 977 this->progress = true; 978 } 979 980 void 981 lower_instructions_visitor::bit_count_to_math(ir_expression *ir) 982 { 983 /* For more details, see: 984 * 985 * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetPaallel 986 */ 987 const unsigned elements = ir->operands[0]->type->vector_elements; 988 ir_variable *temp = new(ir) ir_variable(glsl_type::uvec(elements), "temp", 989 ir_var_temporary); 990 ir_constant *c55555555 = new(ir) ir_constant(0x55555555u); 991 ir_constant *c33333333 = new(ir) ir_constant(0x33333333u); 992 ir_constant *c0F0F0F0F = new(ir) ir_constant(0x0F0F0F0Fu); 993 ir_constant *c01010101 = new(ir) ir_constant(0x01010101u); 994 ir_constant *c1 = new(ir) ir_constant(1u); 995 ir_constant *c2 = new(ir) ir_constant(2u); 996 ir_constant *c4 = new(ir) ir_constant(4u); 997 ir_constant *c24 = new(ir) ir_constant(24u); 998 999 base_ir->insert_before(temp); 1000 1001 if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 1002 base_ir->insert_before(assign(temp, ir->operands[0])); 1003 } else { 1004 assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); 1005 base_ir->insert_before(assign(temp, i2u(ir->operands[0]))); 1006 } 1007 1008 /* temp = temp - ((temp >> 1) & 0x55555555u); */ 1009 base_ir->insert_before(assign(temp, sub(temp, bit_and(rshift(temp, c1), 1010 c55555555)))); 1011 1012 /* temp = (temp & 0x33333333u) + ((temp >> 2) & 0x33333333u); */ 1013 base_ir->insert_before(assign(temp, add(bit_and(temp, c33333333), 1014 bit_and(rshift(temp, c2), 1015 c33333333->clone(ir, NULL))))); 1016 1017 /* int(((temp + (temp >> 4) & 0xF0F0F0Fu) * 0x1010101u) >> 24); */ 1018 ir->operation = ir_unop_u2i; 1019 ir->operands[0] = rshift(mul(bit_and(add(temp, rshift(temp, c4)), c0F0F0F0F), 1020 c01010101), 1021 c24); 1022 1023 this->progress = true; 1024 } 1025 1026 void 1027 lower_instructions_visitor::extract_to_shifts(ir_expression *ir) 1028 { 1029 ir_variable *bits = 1030 new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary); 1031 1032 base_ir->insert_before(bits); 1033 base_ir->insert_before(assign(bits, ir->operands[2])); 1034 1035 if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 1036 ir_constant *c1 = 1037 new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements); 1038 ir_constant *c32 = 1039 new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements); 1040 ir_constant *cFFFFFFFF = 1041 new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements); 1042 1043 /* At least some hardware treats (x << y) as (x << (y%32)). This means 1044 * we'd get a mask of 0 when bits is 32. Special case it. 1045 * 1046 * mask = bits == 32 ? 0xffffffff : (1u << bits) - 1u; 1047 */ 1048 ir_expression *mask = csel(equal(bits, c32), 1049 cFFFFFFFF, 1050 sub(lshift(c1, bits), c1->clone(ir, NULL))); 1051 1052 /* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: 1053 * 1054 * If bits is zero, the result will be zero. 1055 * 1056 * Since (1 << 0) - 1 == 0, we don't need to bother with the conditional 1057 * select as in the signed integer case. 1058 * 1059 * (value >> offset) & mask; 1060 */ 1061 ir->operation = ir_binop_bit_and; 1062 ir->operands[0] = rshift(ir->operands[0], ir->operands[1]); 1063 ir->operands[1] = mask; 1064 ir->operands[2] = NULL; 1065 } else { 1066 ir_constant *c0 = 1067 new(ir) ir_constant(int(0), ir->operands[0]->type->vector_elements); 1068 ir_constant *c32 = 1069 new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements); 1070 ir_variable *temp = 1071 new(ir) ir_variable(ir->operands[0]->type, "temp", ir_var_temporary); 1072 1073 /* temp = 32 - bits; */ 1074 base_ir->insert_before(temp); 1075 base_ir->insert_before(assign(temp, sub(c32, bits))); 1076 1077 /* expr = value << (temp - offset)) >> temp; */ 1078 ir_expression *expr = 1079 rshift(lshift(ir->operands[0], sub(temp, ir->operands[1])), temp); 1080 1081 /* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: 1082 * 1083 * If bits is zero, the result will be zero. 1084 * 1085 * Due to the (x << (y%32)) behavior mentioned before, the (value << 1086 * (32-0)) doesn't "erase" all of the data as we would like, so finish 1087 * up with: 1088 * 1089 * (bits == 0) ? 0 : e; 1090 */ 1091 ir->operation = ir_triop_csel; 1092 ir->operands[0] = equal(c0, bits); 1093 ir->operands[1] = c0->clone(ir, NULL); 1094 ir->operands[2] = expr; 1095 } 1096 1097 this->progress = true; 1098 } 1099 1100 void 1101 lower_instructions_visitor::insert_to_shifts(ir_expression *ir) 1102 { 1103 ir_constant *c1; 1104 ir_constant *c32; 1105 ir_constant *cFFFFFFFF; 1106 ir_variable *offset = 1107 new(ir) ir_variable(ir->operands[0]->type, "offset", ir_var_temporary); 1108 ir_variable *bits = 1109 new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary); 1110 ir_variable *mask = 1111 new(ir) ir_variable(ir->operands[0]->type, "mask", ir_var_temporary); 1112 1113 if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) { 1114 c1 = new(ir) ir_constant(int(1), ir->operands[0]->type->vector_elements); 1115 c32 = new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements); 1116 cFFFFFFFF = new(ir) ir_constant(int(0xFFFFFFFF), ir->operands[0]->type->vector_elements); 1117 } else { 1118 assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); 1119 1120 c1 = new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements); 1121 c32 = new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements); 1122 cFFFFFFFF = new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements); 1123 } 1124 1125 base_ir->insert_before(offset); 1126 base_ir->insert_before(assign(offset, ir->operands[2])); 1127 1128 base_ir->insert_before(bits); 1129 base_ir->insert_before(assign(bits, ir->operands[3])); 1130 1131 /* At least some hardware treats (x << y) as (x << (y%32)). This means 1132 * we'd get a mask of 0 when bits is 32. Special case it. 1133 * 1134 * mask = (bits == 32 ? 0xffffffff : (1u << bits) - 1u) << offset; 1135 * 1136 * Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: 1137 * 1138 * The result will be undefined if offset or bits is negative, or if the 1139 * sum of offset and bits is greater than the number of bits used to 1140 * store the operand. 1141 * 1142 * Since it's undefined, there are a couple other ways this could be 1143 * implemented. The other way that was considered was to put the csel 1144 * around the whole thing: 1145 * 1146 * final_result = bits == 32 ? insert : ... ; 1147 */ 1148 base_ir->insert_before(mask); 1149 1150 base_ir->insert_before(assign(mask, csel(equal(bits, c32), 1151 cFFFFFFFF, 1152 lshift(sub(lshift(c1, bits), 1153 c1->clone(ir, NULL)), 1154 offset)))); 1155 1156 /* (base & ~mask) | ((insert << offset) & mask) */ 1157 ir->operation = ir_binop_bit_or; 1158 ir->operands[0] = bit_and(ir->operands[0], bit_not(mask)); 1159 ir->operands[1] = bit_and(lshift(ir->operands[1], offset), mask); 1160 ir->operands[2] = NULL; 1161 ir->operands[3] = NULL; 1162 1163 this->progress = true; 1164 } 1165 1166 void 1167 lower_instructions_visitor::reverse_to_shifts(ir_expression *ir) 1168 { 1169 /* For more details, see: 1170 * 1171 * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel 1172 */ 1173 ir_constant *c1 = 1174 new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements); 1175 ir_constant *c2 = 1176 new(ir) ir_constant(2u, ir->operands[0]->type->vector_elements); 1177 ir_constant *c4 = 1178 new(ir) ir_constant(4u, ir->operands[0]->type->vector_elements); 1179 ir_constant *c8 = 1180 new(ir) ir_constant(8u, ir->operands[0]->type->vector_elements); 1181 ir_constant *c16 = 1182 new(ir) ir_constant(16u, ir->operands[0]->type->vector_elements); 1183 ir_constant *c33333333 = 1184 new(ir) ir_constant(0x33333333u, ir->operands[0]->type->vector_elements); 1185 ir_constant *c55555555 = 1186 new(ir) ir_constant(0x55555555u, ir->operands[0]->type->vector_elements); 1187 ir_constant *c0F0F0F0F = 1188 new(ir) ir_constant(0x0F0F0F0Fu, ir->operands[0]->type->vector_elements); 1189 ir_constant *c00FF00FF = 1190 new(ir) ir_constant(0x00FF00FFu, ir->operands[0]->type->vector_elements); 1191 ir_variable *temp = 1192 new(ir) ir_variable(glsl_type::uvec(ir->operands[0]->type->vector_elements), 1193 "temp", ir_var_temporary); 1194 ir_instruction &i = *base_ir; 1195 1196 i.insert_before(temp); 1197 1198 if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 1199 i.insert_before(assign(temp, ir->operands[0])); 1200 } else { 1201 assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); 1202 i.insert_before(assign(temp, i2u(ir->operands[0]))); 1203 } 1204 1205 /* Swap odd and even bits. 1206 * 1207 * temp = ((temp >> 1) & 0x55555555u) | ((temp & 0x55555555u) << 1); 1208 */ 1209 i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c1), c55555555), 1210 lshift(bit_and(temp, c55555555->clone(ir, NULL)), 1211 c1->clone(ir, NULL))))); 1212 /* Swap consecutive pairs. 1213 * 1214 * temp = ((temp >> 2) & 0x33333333u) | ((temp & 0x33333333u) << 2); 1215 */ 1216 i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c2), c33333333), 1217 lshift(bit_and(temp, c33333333->clone(ir, NULL)), 1218 c2->clone(ir, NULL))))); 1219 1220 /* Swap nibbles. 1221 * 1222 * temp = ((temp >> 4) & 0x0F0F0F0Fu) | ((temp & 0x0F0F0F0Fu) << 4); 1223 */ 1224 i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c4), c0F0F0F0F), 1225 lshift(bit_and(temp, c0F0F0F0F->clone(ir, NULL)), 1226 c4->clone(ir, NULL))))); 1227 1228 /* The last step is, basically, bswap. Swap the bytes, then swap the 1229 * words. When this code is run through GCC on x86, it does generate a 1230 * bswap instruction. 1231 * 1232 * temp = ((temp >> 8) & 0x00FF00FFu) | ((temp & 0x00FF00FFu) << 8); 1233 * temp = ( temp >> 16 ) | ( temp << 16); 1234 */ 1235 i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c8), c00FF00FF), 1236 lshift(bit_and(temp, c00FF00FF->clone(ir, NULL)), 1237 c8->clone(ir, NULL))))); 1238 1239 if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 1240 ir->operation = ir_binop_bit_or; 1241 ir->operands[0] = rshift(temp, c16); 1242 ir->operands[1] = lshift(temp, c16->clone(ir, NULL)); 1243 } else { 1244 ir->operation = ir_unop_u2i; 1245 ir->operands[0] = bit_or(rshift(temp, c16), 1246 lshift(temp, c16->clone(ir, NULL))); 1247 } 1248 1249 this->progress = true; 1250 } 1251 1252 void 1253 lower_instructions_visitor::find_lsb_to_float_cast(ir_expression *ir) 1254 { 1255 /* For more details, see: 1256 * 1257 * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast 1258 */ 1259 const unsigned elements = ir->operands[0]->type->vector_elements; 1260 ir_constant *c0 = new(ir) ir_constant(unsigned(0), elements); 1261 ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements); 1262 ir_constant *c23 = new(ir) ir_constant(int(23), elements); 1263 ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements); 1264 ir_variable *temp = 1265 new(ir) ir_variable(glsl_type::ivec(elements), "temp", ir_var_temporary); 1266 ir_variable *lsb_only = 1267 new(ir) ir_variable(glsl_type::uvec(elements), "lsb_only", ir_var_temporary); 1268 ir_variable *as_float = 1269 new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary); 1270 ir_variable *lsb = 1271 new(ir) ir_variable(glsl_type::ivec(elements), "lsb", ir_var_temporary); 1272 1273 ir_instruction &i = *base_ir; 1274 1275 i.insert_before(temp); 1276 1277 if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) { 1278 i.insert_before(assign(temp, ir->operands[0])); 1279 } else { 1280 assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); 1281 i.insert_before(assign(temp, u2i(ir->operands[0]))); 1282 } 1283 1284 /* The int-to-float conversion is lossless because (value & -value) is 1285 * either a power of two or zero. We don't use the result in the zero 1286 * case. The uint() cast is necessary so that 0x80000000 does not 1287 * generate a negative value. 1288 * 1289 * uint lsb_only = uint(value & -value); 1290 * float as_float = float(lsb_only); 1291 */ 1292 i.insert_before(lsb_only); 1293 i.insert_before(assign(lsb_only, i2u(bit_and(temp, neg(temp))))); 1294 1295 i.insert_before(as_float); 1296 i.insert_before(assign(as_float, u2f(lsb_only))); 1297 1298 /* This is basically an open-coded frexp. Implementations that have a 1299 * native frexp instruction would be better served by that. This is 1300 * optimized versus a full-featured open-coded implementation in two ways: 1301 * 1302 * - We don't care about a correct result from subnormal numbers (including 1303 * 0.0), so the raw exponent can always be safely unbiased. 1304 * 1305 * - The value cannot be negative, so it does not need to be masked off to 1306 * extract the exponent. 1307 * 1308 * int lsb = (floatBitsToInt(as_float) >> 23) - 0x7f; 1309 */ 1310 i.insert_before(lsb); 1311 i.insert_before(assign(lsb, sub(rshift(bitcast_f2i(as_float), c23), c7F))); 1312 1313 /* Use lsb_only in the comparison instead of temp so that the & (far above) 1314 * can possibly generate the result without an explicit comparison. 1315 * 1316 * (lsb_only == 0) ? -1 : lsb; 1317 * 1318 * Since our input values are all integers, the unbiased exponent must not 1319 * be negative. It will only be negative (-0x7f, in fact) if lsb_only is 1320 * 0. Instead of using (lsb_only == 0), we could use (lsb >= 0). Which is 1321 * better is likely GPU dependent. Either way, the difference should be 1322 * small. 1323 */ 1324 ir->operation = ir_triop_csel; 1325 ir->operands[0] = equal(lsb_only, c0); 1326 ir->operands[1] = cminus1; 1327 ir->operands[2] = new(ir) ir_dereference_variable(lsb); 1328 1329 this->progress = true; 1330 } 1331 1332 void 1333 lower_instructions_visitor::find_msb_to_float_cast(ir_expression *ir) 1334 { 1335 /* For more details, see: 1336 * 1337 * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast 1338 */ 1339 const unsigned elements = ir->operands[0]->type->vector_elements; 1340 ir_constant *c0 = new(ir) ir_constant(int(0), elements); 1341 ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements); 1342 ir_constant *c23 = new(ir) ir_constant(int(23), elements); 1343 ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements); 1344 ir_constant *c000000FF = new(ir) ir_constant(0x000000FFu, elements); 1345 ir_constant *cFFFFFF00 = new(ir) ir_constant(0xFFFFFF00u, elements); 1346 ir_variable *temp = 1347 new(ir) ir_variable(glsl_type::uvec(elements), "temp", ir_var_temporary); 1348 ir_variable *as_float = 1349 new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary); 1350 ir_variable *msb = 1351 new(ir) ir_variable(glsl_type::ivec(elements), "msb", ir_var_temporary); 1352 1353 ir_instruction &i = *base_ir; 1354 1355 i.insert_before(temp); 1356 1357 if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 1358 i.insert_before(assign(temp, ir->operands[0])); 1359 } else { 1360 assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); 1361 1362 /* findMSB(uint(abs(some_int))) almost always does the right thing. 1363 * There are two problem values: 1364 * 1365 * * 0x80000000. Since abs(0x80000000) == 0x80000000, findMSB returns 1366 * 31. However, findMSB(int(0x80000000)) == 30. 1367 * 1368 * * 0xffffffff. Since abs(0xffffffff) == 1, findMSB returns 1369 * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: 1370 * 1371 * For a value of zero or negative one, -1 will be returned. 1372 * 1373 * For all negative number cases, including 0x80000000 and 0xffffffff, 1374 * the correct value is obtained from findMSB if instead of negating the 1375 * (already negative) value the logical-not is used. A conditonal 1376 * logical-not can be achieved in two instructions. 1377 */ 1378 ir_variable *as_int = 1379 new(ir) ir_variable(glsl_type::ivec(elements), "as_int", ir_var_temporary); 1380 ir_constant *c31 = new(ir) ir_constant(int(31), elements); 1381 1382 i.insert_before(as_int); 1383 i.insert_before(assign(as_int, ir->operands[0])); 1384 i.insert_before(assign(temp, i2u(expr(ir_binop_bit_xor, 1385 as_int, 1386 rshift(as_int, c31))))); 1387 } 1388 1389 /* The int-to-float conversion is lossless because bits are conditionally 1390 * masked off the bottom of temp to ensure the value has at most 24 bits of 1391 * data or is zero. We don't use the result in the zero case. The uint() 1392 * cast is necessary so that 0x80000000 does not generate a negative value. 1393 * 1394 * float as_float = float(temp > 255 ? temp & ~255 : temp); 1395 */ 1396 i.insert_before(as_float); 1397 i.insert_before(assign(as_float, u2f(csel(greater(temp, c000000FF), 1398 bit_and(temp, cFFFFFF00), 1399 temp)))); 1400 1401 /* This is basically an open-coded frexp. Implementations that have a 1402 * native frexp instruction would be better served by that. This is 1403 * optimized versus a full-featured open-coded implementation in two ways: 1404 * 1405 * - We don't care about a correct result from subnormal numbers (including 1406 * 0.0), so the raw exponent can always be safely unbiased. 1407 * 1408 * - The value cannot be negative, so it does not need to be masked off to 1409 * extract the exponent. 1410 * 1411 * int msb = (floatBitsToInt(as_float) >> 23) - 0x7f; 1412 */ 1413 i.insert_before(msb); 1414 i.insert_before(assign(msb, sub(rshift(bitcast_f2i(as_float), c23), c7F))); 1415 1416 /* Use msb in the comparison instead of temp so that the subtract can 1417 * possibly generate the result without an explicit comparison. 1418 * 1419 * (msb < 0) ? -1 : msb; 1420 * 1421 * Since our input values are all integers, the unbiased exponent must not 1422 * be negative. It will only be negative (-0x7f, in fact) if temp is 0. 1423 */ 1424 ir->operation = ir_triop_csel; 1425 ir->operands[0] = less(msb, c0); 1426 ir->operands[1] = cminus1; 1427 ir->operands[2] = new(ir) ir_dereference_variable(msb); 1428 1429 this->progress = true; 1430 } 1431 1432 ir_expression * 1433 lower_instructions_visitor::_carry(operand a, operand b) 1434 { 1435 if (lowering(CARRY_TO_ARITH)) 1436 return i2u(b2i(less(add(a, b), 1437 a.val->clone(ralloc_parent(a.val), NULL)))); 1438 else 1439 return carry(a, b); 1440 } 1441 1442 void 1443 lower_instructions_visitor::imul_high_to_mul(ir_expression *ir) 1444 { 1445 /* ABCD 1446 * * EFGH 1447 * ====== 1448 * (GH * CD) + (GH * AB) << 16 + (EF * CD) << 16 + (EF * AB) << 32 1449 * 1450 * In GLSL, (a * b) becomes 1451 * 1452 * uint m1 = (a & 0x0000ffffu) * (b & 0x0000ffffu); 1453 * uint m2 = (a & 0x0000ffffu) * (b >> 16); 1454 * uint m3 = (a >> 16) * (b & 0x0000ffffu); 1455 * uint m4 = (a >> 16) * (b >> 16); 1456 * 1457 * uint c1; 1458 * uint c2; 1459 * uint lo_result; 1460 * uint hi_result; 1461 * 1462 * lo_result = uaddCarry(m1, m2 << 16, c1); 1463 * hi_result = m4 + c1; 1464 * lo_result = uaddCarry(lo_result, m3 << 16, c2); 1465 * hi_result = hi_result + c2; 1466 * hi_result = hi_result + (m2 >> 16) + (m3 >> 16); 1467 */ 1468 const unsigned elements = ir->operands[0]->type->vector_elements; 1469 ir_variable *src1 = 1470 new(ir) ir_variable(glsl_type::uvec(elements), "src1", ir_var_temporary); 1471 ir_variable *src1h = 1472 new(ir) ir_variable(glsl_type::uvec(elements), "src1h", ir_var_temporary); 1473 ir_variable *src1l = 1474 new(ir) ir_variable(glsl_type::uvec(elements), "src1l", ir_var_temporary); 1475 ir_variable *src2 = 1476 new(ir) ir_variable(glsl_type::uvec(elements), "src2", ir_var_temporary); 1477 ir_variable *src2h = 1478 new(ir) ir_variable(glsl_type::uvec(elements), "src2h", ir_var_temporary); 1479 ir_variable *src2l = 1480 new(ir) ir_variable(glsl_type::uvec(elements), "src2l", ir_var_temporary); 1481 ir_variable *t1 = 1482 new(ir) ir_variable(glsl_type::uvec(elements), "t1", ir_var_temporary); 1483 ir_variable *t2 = 1484 new(ir) ir_variable(glsl_type::uvec(elements), "t2", ir_var_temporary); 1485 ir_variable *lo = 1486 new(ir) ir_variable(glsl_type::uvec(elements), "lo", ir_var_temporary); 1487 ir_variable *hi = 1488 new(ir) ir_variable(glsl_type::uvec(elements), "hi", ir_var_temporary); 1489 ir_variable *different_signs = NULL; 1490 ir_constant *c0000FFFF = new(ir) ir_constant(0x0000FFFFu, elements); 1491 ir_constant *c16 = new(ir) ir_constant(16u, elements); 1492 1493 ir_instruction &i = *base_ir; 1494 1495 i.insert_before(src1); 1496 i.insert_before(src2); 1497 i.insert_before(src1h); 1498 i.insert_before(src2h); 1499 i.insert_before(src1l); 1500 i.insert_before(src2l); 1501 1502 if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { 1503 i.insert_before(assign(src1, ir->operands[0])); 1504 i.insert_before(assign(src2, ir->operands[1])); 1505 } else { 1506 assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); 1507 1508 ir_variable *itmp1 = 1509 new(ir) ir_variable(glsl_type::ivec(elements), "itmp1", ir_var_temporary); 1510 ir_variable *itmp2 = 1511 new(ir) ir_variable(glsl_type::ivec(elements), "itmp2", ir_var_temporary); 1512 ir_constant *c0 = new(ir) ir_constant(int(0), elements); 1513 1514 i.insert_before(itmp1); 1515 i.insert_before(itmp2); 1516 i.insert_before(assign(itmp1, ir->operands[0])); 1517 i.insert_before(assign(itmp2, ir->operands[1])); 1518 1519 different_signs = 1520 new(ir) ir_variable(glsl_type::bvec(elements), "different_signs", 1521 ir_var_temporary); 1522 1523 i.insert_before(different_signs); 1524 i.insert_before(assign(different_signs, expr(ir_binop_logic_xor, 1525 less(itmp1, c0), 1526 less(itmp2, c0->clone(ir, NULL))))); 1527 1528 i.insert_before(assign(src1, i2u(abs(itmp1)))); 1529 i.insert_before(assign(src2, i2u(abs(itmp2)))); 1530 } 1531 1532 i.insert_before(assign(src1l, bit_and(src1, c0000FFFF))); 1533 i.insert_before(assign(src2l, bit_and(src2, c0000FFFF->clone(ir, NULL)))); 1534 i.insert_before(assign(src1h, rshift(src1, c16))); 1535 i.insert_before(assign(src2h, rshift(src2, c16->clone(ir, NULL)))); 1536 1537 i.insert_before(lo); 1538 i.insert_before(hi); 1539 i.insert_before(t1); 1540 i.insert_before(t2); 1541 1542 i.insert_before(assign(lo, mul(src1l, src2l))); 1543 i.insert_before(assign(t1, mul(src1l, src2h))); 1544 i.insert_before(assign(t2, mul(src1h, src2l))); 1545 i.insert_before(assign(hi, mul(src1h, src2h))); 1546 1547 i.insert_before(assign(hi, add(hi, _carry(lo, lshift(t1, c16->clone(ir, NULL)))))); 1548 i.insert_before(assign(lo, add(lo, lshift(t1, c16->clone(ir, NULL))))); 1549 1550 i.insert_before(assign(hi, add(hi, _carry(lo, lshift(t2, c16->clone(ir, NULL)))))); 1551 i.insert_before(assign(lo, add(lo, lshift(t2, c16->clone(ir, NULL))))); 1552 1553 if (different_signs == NULL) { 1554 assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); 1555 1556 ir->operation = ir_binop_add; 1557 ir->operands[0] = add(hi, rshift(t1, c16->clone(ir, NULL))); 1558 ir->operands[1] = rshift(t2, c16->clone(ir, NULL)); 1559 } else { 1560 assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); 1561 1562 i.insert_before(assign(hi, add(add(hi, rshift(t1, c16->clone(ir, NULL))), 1563 rshift(t2, c16->clone(ir, NULL))))); 1564 1565 /* For channels where different_signs is set we have to perform a 64-bit 1566 * negation. This is *not* the same as just negating the high 32-bits. 1567 * Consider -3 * 2. The high 32-bits is 0, but the desired result is 1568 * -1, not -0! Recall -x == ~x + 1. 1569 */ 1570 ir_variable *neg_hi = 1571 new(ir) ir_variable(glsl_type::ivec(elements), "neg_hi", ir_var_temporary); 1572 ir_constant *c1 = new(ir) ir_constant(1u, elements); 1573 1574 i.insert_before(neg_hi); 1575 i.insert_before(assign(neg_hi, add(bit_not(u2i(hi)), 1576 u2i(_carry(bit_not(lo), c1))))); 1577 1578 ir->operation = ir_triop_csel; 1579 ir->operands[0] = new(ir) ir_dereference_variable(different_signs); 1580 ir->operands[1] = new(ir) ir_dereference_variable(neg_hi); 1581 ir->operands[2] = u2i(hi); 1582 } 1583 } 1584 1585 ir_visitor_status 1586 lower_instructions_visitor::visit_leave(ir_expression *ir) 1587 { 1588 switch (ir->operation) { 1589 case ir_binop_dot: 1590 if (ir->operands[0]->type->is_double()) 1591 double_dot_to_fma(ir); 1592 break; 1593 case ir_triop_lrp: 1594 if (ir->operands[0]->type->is_double()) 1595 double_lrp(ir); 1596 break; 1597 case ir_binop_sub: 1598 if (lowering(SUB_TO_ADD_NEG)) 1599 sub_to_add_neg(ir); 1600 break; 1601 1602 case ir_binop_div: 1603 if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP)) 1604 int_div_to_mul_rcp(ir); 1605 else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) || 1606 (ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP))) 1607 div_to_mul_rcp(ir); 1608 break; 1609 1610 case ir_unop_exp: 1611 if (lowering(EXP_TO_EXP2)) 1612 exp_to_exp2(ir); 1613 break; 1614 1615 case ir_unop_log: 1616 if (lowering(LOG_TO_LOG2)) 1617 log_to_log2(ir); 1618 break; 1619 1620 case ir_binop_mod: 1621 if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double())) 1622 mod_to_floor(ir); 1623 break; 1624 1625 case ir_binop_pow: 1626 if (lowering(POW_TO_EXP2)) 1627 pow_to_exp2(ir); 1628 break; 1629 1630 case ir_binop_ldexp: 1631 if (lowering(LDEXP_TO_ARITH) && ir->type->is_float()) 1632 ldexp_to_arith(ir); 1633 if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->type->is_double()) 1634 dldexp_to_arith(ir); 1635 break; 1636 1637 case ir_unop_frexp_exp: 1638 if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) 1639 dfrexp_exp_to_arith(ir); 1640 break; 1641 1642 case ir_unop_frexp_sig: 1643 if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) 1644 dfrexp_sig_to_arith(ir); 1645 break; 1646 1647 case ir_binop_carry: 1648 if (lowering(CARRY_TO_ARITH)) 1649 carry_to_arith(ir); 1650 break; 1651 1652 case ir_binop_borrow: 1653 if (lowering(BORROW_TO_ARITH)) 1654 borrow_to_arith(ir); 1655 break; 1656 1657 case ir_unop_saturate: 1658 if (lowering(SAT_TO_CLAMP)) 1659 sat_to_clamp(ir); 1660 break; 1661 1662 case ir_unop_trunc: 1663 if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 1664 dtrunc_to_dfrac(ir); 1665 break; 1666 1667 case ir_unop_ceil: 1668 if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 1669 dceil_to_dfrac(ir); 1670 break; 1671 1672 case ir_unop_floor: 1673 if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 1674 dfloor_to_dfrac(ir); 1675 break; 1676 1677 case ir_unop_round_even: 1678 if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 1679 dround_even_to_dfrac(ir); 1680 break; 1681 1682 case ir_unop_sign: 1683 if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) 1684 dsign_to_csel(ir); 1685 break; 1686 1687 case ir_unop_bit_count: 1688 if (lowering(BIT_COUNT_TO_MATH)) 1689 bit_count_to_math(ir); 1690 break; 1691 1692 case ir_triop_bitfield_extract: 1693 if (lowering(EXTRACT_TO_SHIFTS)) 1694 extract_to_shifts(ir); 1695 break; 1696 1697 case ir_quadop_bitfield_insert: 1698 if (lowering(INSERT_TO_SHIFTS)) 1699 insert_to_shifts(ir); 1700 break; 1701 1702 case ir_unop_bitfield_reverse: 1703 if (lowering(REVERSE_TO_SHIFTS)) 1704 reverse_to_shifts(ir); 1705 break; 1706 1707 case ir_unop_find_lsb: 1708 if (lowering(FIND_LSB_TO_FLOAT_CAST)) 1709 find_lsb_to_float_cast(ir); 1710 break; 1711 1712 case ir_unop_find_msb: 1713 if (lowering(FIND_MSB_TO_FLOAT_CAST)) 1714 find_msb_to_float_cast(ir); 1715 break; 1716 1717 case ir_binop_imul_high: 1718 if (lowering(IMUL_HIGH_TO_MUL)) 1719 imul_high_to_mul(ir); 1720 break; 1721 1722 default: 1723 return visit_continue; 1724 } 1725 1726 return visit_continue; 1727 } 1728