1 /* 2 * Copyright 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file opt_minmax.cpp 26 * 27 * Drop operands from an expression tree of only min/max operations if they 28 * can be proven to not contribute to the final result. 29 * 30 * The algorithm is similar to alpha-beta pruning on a minmax search. 31 */ 32 33 #include "ir.h" 34 #include "ir_visitor.h" 35 #include "ir_rvalue_visitor.h" 36 #include "ir_optimization.h" 37 #include "ir_builder.h" 38 #include "program/prog_instruction.h" 39 #include "compiler/glsl_types.h" 40 #include "main/macros.h" 41 42 using namespace ir_builder; 43 44 namespace { 45 46 enum compare_components_result { 47 LESS, 48 LESS_OR_EQUAL, 49 EQUAL, 50 GREATER_OR_EQUAL, 51 GREATER, 52 MIXED 53 }; 54 55 class minmax_range { 56 public: 57 minmax_range(ir_constant *low = NULL, ir_constant *high = NULL) 58 { 59 this->low = low; 60 this->high = high; 61 } 62 63 /* low is the lower limit of the range, high is the higher limit. NULL on 64 * low means negative infinity (unlimited) and on high positive infinity 65 * (unlimited). Because of the two interpretations of the value NULL, 66 * arbitrary comparison between ir_constants is impossible. 67 */ 68 ir_constant *low; 69 ir_constant *high; 70 }; 71 72 class ir_minmax_visitor : public ir_rvalue_enter_visitor { 73 public: 74 ir_minmax_visitor() 75 : progress(false) 76 { 77 } 78 79 ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange); 80 81 void handle_rvalue(ir_rvalue **rvalue); 82 83 bool progress; 84 }; 85 86 /* 87 * Returns LESS if all vector components of `a' are strictly lower than of `b', 88 * GREATER if all vector components of `a' are strictly greater than of `b', 89 * MIXED if some vector components of `a' are strictly lower than of `b' while 90 * others are strictly greater, or EQUAL otherwise. 91 */ 92 static enum compare_components_result 93 compare_components(ir_constant *a, ir_constant *b) 94 { 95 assert(a != NULL); 96 assert(b != NULL); 97 98 assert(a->type->base_type == b->type->base_type); 99 100 unsigned a_inc = a->type->is_scalar() ? 0 : 1; 101 unsigned b_inc = b->type->is_scalar() ? 0 : 1; 102 unsigned components = MAX2(a->type->components(), b->type->components()); 103 104 bool foundless = false; 105 bool foundgreater = false; 106 bool foundequal = false; 107 108 for (unsigned i = 0, c0 = 0, c1 = 0; 109 i < components; 110 c0 += a_inc, c1 += b_inc, ++i) { 111 switch (a->type->base_type) { 112 case GLSL_TYPE_UINT: 113 if (a->value.u[c0] < b->value.u[c1]) 114 foundless = true; 115 else if (a->value.u[c0] > b->value.u[c1]) 116 foundgreater = true; 117 else 118 foundequal = true; 119 break; 120 case GLSL_TYPE_INT: 121 if (a->value.i[c0] < b->value.i[c1]) 122 foundless = true; 123 else if (a->value.i[c0] > b->value.i[c1]) 124 foundgreater = true; 125 else 126 foundequal = true; 127 break; 128 case GLSL_TYPE_FLOAT: 129 if (a->value.f[c0] < b->value.f[c1]) 130 foundless = true; 131 else if (a->value.f[c0] > b->value.f[c1]) 132 foundgreater = true; 133 else 134 foundequal = true; 135 break; 136 case GLSL_TYPE_DOUBLE: 137 if (a->value.d[c0] < b->value.d[c1]) 138 foundless = true; 139 else if (a->value.d[c0] > b->value.d[c1]) 140 foundgreater = true; 141 else 142 foundequal = true; 143 break; 144 default: 145 unreachable("not reached"); 146 } 147 } 148 149 if (foundless && foundgreater) { 150 /* Some components are strictly lower, others are strictly greater */ 151 return MIXED; 152 } 153 154 if (foundequal) { 155 /* It is not mixed, but it is not strictly lower or greater */ 156 if (foundless) 157 return LESS_OR_EQUAL; 158 if (foundgreater) 159 return GREATER_OR_EQUAL; 160 return EQUAL; 161 } 162 163 /* All components are strictly lower or strictly greater */ 164 return foundless ? LESS : GREATER; 165 } 166 167 static ir_constant * 168 combine_constant(bool ismin, ir_constant *a, ir_constant *b) 169 { 170 void *mem_ctx = ralloc_parent(a); 171 ir_constant *c = a->clone(mem_ctx, NULL); 172 for (unsigned i = 0; i < c->type->components(); i++) { 173 switch (c->type->base_type) { 174 case GLSL_TYPE_UINT: 175 if ((ismin && b->value.u[i] < c->value.u[i]) || 176 (!ismin && b->value.u[i] > c->value.u[i])) 177 c->value.u[i] = b->value.u[i]; 178 break; 179 case GLSL_TYPE_INT: 180 if ((ismin && b->value.i[i] < c->value.i[i]) || 181 (!ismin && b->value.i[i] > c->value.i[i])) 182 c->value.i[i] = b->value.i[i]; 183 break; 184 case GLSL_TYPE_FLOAT: 185 if ((ismin && b->value.f[i] < c->value.f[i]) || 186 (!ismin && b->value.f[i] > c->value.f[i])) 187 c->value.f[i] = b->value.f[i]; 188 break; 189 case GLSL_TYPE_DOUBLE: 190 if ((ismin && b->value.d[i] < c->value.d[i]) || 191 (!ismin && b->value.d[i] > c->value.d[i])) 192 c->value.d[i] = b->value.d[i]; 193 break; 194 default: 195 assert(!"not reached"); 196 } 197 } 198 return c; 199 } 200 201 static ir_constant * 202 smaller_constant(ir_constant *a, ir_constant *b) 203 { 204 assert(a != NULL); 205 assert(b != NULL); 206 207 enum compare_components_result ret = compare_components(a, b); 208 if (ret == MIXED) 209 return combine_constant(true, a, b); 210 else if (ret < EQUAL) 211 return a; 212 else 213 return b; 214 } 215 216 static ir_constant * 217 larger_constant(ir_constant *a, ir_constant *b) 218 { 219 assert(a != NULL); 220 assert(b != NULL); 221 222 enum compare_components_result ret = compare_components(a, b); 223 if (ret == MIXED) 224 return combine_constant(false, a, b); 225 else if (ret < EQUAL) 226 return b; 227 else 228 return a; 229 } 230 231 /* Combines two ranges by doing an element-wise min() / max() depending on the 232 * operation. 233 */ 234 static minmax_range 235 combine_range(minmax_range r0, minmax_range r1, bool ismin) 236 { 237 minmax_range ret; 238 239 if (!r0.low) { 240 ret.low = ismin ? r0.low : r1.low; 241 } else if (!r1.low) { 242 ret.low = ismin ? r1.low : r0.low; 243 } else { 244 ret.low = ismin ? smaller_constant(r0.low, r1.low) : 245 larger_constant(r0.low, r1.low); 246 } 247 248 if (!r0.high) { 249 ret.high = ismin ? r1.high : r0.high; 250 } else if (!r1.high) { 251 ret.high = ismin ? r0.high : r1.high; 252 } else { 253 ret.high = ismin ? smaller_constant(r0.high, r1.high) : 254 larger_constant(r0.high, r1.high); 255 } 256 257 return ret; 258 } 259 260 /* Returns a range so that lower limit is the larger of the two lower limits, 261 * and higher limit is the smaller of the two higher limits. 262 */ 263 static minmax_range 264 range_intersection(minmax_range r0, minmax_range r1) 265 { 266 minmax_range ret; 267 268 if (!r0.low) 269 ret.low = r1.low; 270 else if (!r1.low) 271 ret.low = r0.low; 272 else 273 ret.low = larger_constant(r0.low, r1.low); 274 275 if (!r0.high) 276 ret.high = r1.high; 277 else if (!r1.high) 278 ret.high = r0.high; 279 else 280 ret.high = smaller_constant(r0.high, r1.high); 281 282 return ret; 283 } 284 285 static minmax_range 286 get_range(ir_rvalue *rval) 287 { 288 ir_expression *expr = rval->as_expression(); 289 if (expr && (expr->operation == ir_binop_min || 290 expr->operation == ir_binop_max)) { 291 minmax_range r0 = get_range(expr->operands[0]); 292 minmax_range r1 = get_range(expr->operands[1]); 293 return combine_range(r0, r1, expr->operation == ir_binop_min); 294 } 295 296 ir_constant *c = rval->as_constant(); 297 if (c) { 298 return minmax_range(c, c); 299 } 300 301 return minmax_range(); 302 } 303 304 /** 305 * Prunes a min/max expression considering the base range of the parent 306 * min/max expression. 307 * 308 * @param baserange the range that the parents of this min/max expression 309 * in the min/max tree will clamp its value to. 310 */ 311 ir_rvalue * 312 ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange) 313 { 314 assert(expr->operation == ir_binop_min || 315 expr->operation == ir_binop_max); 316 317 bool ismin = expr->operation == ir_binop_min; 318 minmax_range limits[2]; 319 320 /* Recurse to get the ranges for each of the subtrees of this 321 * expression. We need to do this as a separate step because we need to 322 * know the ranges of each of the subtrees before we prune either one. 323 * Consider something like this: 324 * 325 * max 326 * / \ 327 * max max 328 * / \ / \ 329 * 3 a b 2 330 * 331 * We would like to prune away the max on the bottom-right, but to do so 332 * we need to know the range of the expression on the left beforehand, 333 * and there's no guarantee that we will visit either subtree in a 334 * particular order. 335 */ 336 for (unsigned i = 0; i < 2; ++i) 337 limits[i] = get_range(expr->operands[i]); 338 339 for (unsigned i = 0; i < 2; ++i) { 340 bool is_redundant = false; 341 342 enum compare_components_result cr = LESS; 343 if (ismin) { 344 /* If this operand will always be greater than the other one, it's 345 * redundant. 346 */ 347 if (limits[i].low && limits[1 - i].high) { 348 cr = compare_components(limits[i].low, limits[1 - i].high); 349 if (cr >= EQUAL && cr != MIXED) 350 is_redundant = true; 351 } 352 /* If this operand is always greater than baserange, then even if 353 * it's smaller than the other one it'll get clamped, so it's 354 * redundant. 355 */ 356 if (!is_redundant && limits[i].low && baserange.high) { 357 cr = compare_components(limits[i].low, baserange.high); 358 if (cr > EQUAL && cr != MIXED) 359 is_redundant = true; 360 } 361 } else { 362 /* If this operand will always be lower than the other one, it's 363 * redundant. 364 */ 365 if (limits[i].high && limits[1 - i].low) { 366 cr = compare_components(limits[i].high, limits[1 - i].low); 367 if (cr <= EQUAL) 368 is_redundant = true; 369 } 370 /* If this operand is always lower than baserange, then even if 371 * it's greater than the other one it'll get clamped, so it's 372 * redundant. 373 */ 374 if (!is_redundant && limits[i].high && baserange.low) { 375 cr = compare_components(limits[i].high, baserange.low); 376 if (cr < EQUAL) 377 is_redundant = true; 378 } 379 } 380 381 if (is_redundant) { 382 progress = true; 383 384 /* Recurse if necessary. */ 385 ir_expression *op_expr = expr->operands[1 - i]->as_expression(); 386 if (op_expr && (op_expr->operation == ir_binop_min || 387 op_expr->operation == ir_binop_max)) { 388 return prune_expression(op_expr, baserange); 389 } 390 391 return expr->operands[1 - i]; 392 } else if (cr == MIXED) { 393 /* If we have mixed vector operands, we can try to resolve the minmax 394 * expression by doing a component-wise minmax: 395 * 396 * min min 397 * / \ / \ 398 * min a ===> [1,1] a 399 * / \ 400 * [1,3] [3,1] 401 * 402 */ 403 ir_constant *a = expr->operands[0]->as_constant(); 404 ir_constant *b = expr->operands[1]->as_constant(); 405 if (a && b) 406 return combine_constant(ismin, a, b); 407 } 408 } 409 410 /* Now recurse to operands giving them the proper baserange. The baserange 411 * to pass is the intersection of our baserange and the other operand's 412 * limit with one of the ranges unlimited. If we can't compute a valid 413 * intersection, we use the current baserange. 414 */ 415 for (unsigned i = 0; i < 2; ++i) { 416 ir_expression *op_expr = expr->operands[i]->as_expression(); 417 if (op_expr && (op_expr->operation == ir_binop_min || 418 op_expr->operation == ir_binop_max)) { 419 /* We can only compute a new baserange for this operand if we managed 420 * to compute a valid range for the other operand. 421 */ 422 if (ismin) 423 limits[1 - i].low = NULL; 424 else 425 limits[1 - i].high = NULL; 426 minmax_range base = range_intersection(limits[1 - i], baserange); 427 expr->operands[i] = prune_expression(op_expr, base); 428 } 429 } 430 431 /* If we got here we could not discard any of the operands of the minmax 432 * expression, but we can still try to resolve the expression if both 433 * operands are constant. We do this after the loop above, to make sure 434 * that if our operands are minmax expressions we have tried to prune them 435 * first (hopefully reducing them to constants). 436 */ 437 ir_constant *a = expr->operands[0]->as_constant(); 438 ir_constant *b = expr->operands[1]->as_constant(); 439 if (a && b) 440 return combine_constant(ismin, a, b); 441 442 return expr; 443 } 444 445 static ir_rvalue * 446 swizzle_if_required(ir_expression *expr, ir_rvalue *rval) 447 { 448 if (expr->type->is_vector() && rval->type->is_scalar()) { 449 return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements); 450 } else { 451 return rval; 452 } 453 } 454 455 void 456 ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue) 457 { 458 if (!*rvalue) 459 return; 460 461 ir_expression *expr = (*rvalue)->as_expression(); 462 if (!expr || (expr->operation != ir_binop_min && 463 expr->operation != ir_binop_max)) 464 return; 465 466 ir_rvalue *new_rvalue = prune_expression(expr, minmax_range()); 467 if (new_rvalue == *rvalue) 468 return; 469 470 /* If the expression type is a vector and the optimization leaves a scalar 471 * as the result, we need to turn it into a vector. 472 */ 473 *rvalue = swizzle_if_required(expr, new_rvalue); 474 475 progress = true; 476 } 477 478 } 479 480 bool 481 do_minmax_prune(exec_list *instructions) 482 { 483 ir_minmax_visitor v; 484 485 visit_list_elements(&v, instructions); 486 487 return v.progress; 488 } 489