1 /* 2 * Copyright 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file brw_wm_channel_expressions.cpp 26 * 27 * Breaks vector operations down into operations on each component. 28 * 29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each 30 * channel of a vector is laid out as 1 or 2 8-float registers. Each 31 * ALU operation operates on one of those channel registers. As a 32 * result, there is no value to the 965 fragment shader in tracking 33 * "vector" expressions in the sense of GLSL fragment shaders, when 34 * doing a channel at a time may help in constant folding, algebraic 35 * simplification, and reducing the liveness of channel registers. 36 * 37 * The exception to the desire to break everything down to floats is 38 * texturing. The texture sampler returns a writemasked masked 39 * 4/8-register sequence containing the texture values. We don't want 40 * to dispatch to the sampler separately for each channel we need, so 41 * we do retain the vector types in that case. 42 */ 43 44 #include "compiler/glsl/ir.h" 45 #include "compiler/glsl/ir_expression_flattening.h" 46 #include "compiler/glsl_types.h" 47 48 class ir_channel_expressions_visitor : public ir_hierarchical_visitor { 49 public: 50 ir_channel_expressions_visitor() 51 { 52 this->progress = false; 53 this->mem_ctx = NULL; 54 } 55 56 ir_visitor_status visit_leave(ir_assignment *); 57 58 ir_rvalue *get_element(ir_variable *var, unsigned int element); 59 void assign(ir_assignment *ir, int elem, ir_rvalue *val); 60 61 bool progress; 62 void *mem_ctx; 63 }; 64 65 static bool 66 channel_expressions_predicate(ir_instruction *ir) 67 { 68 ir_expression *expr = ir->as_expression(); 69 unsigned int i; 70 71 if (!expr) 72 return false; 73 74 switch (expr->operation) { 75 case ir_unop_pack_half_2x16: 76 case ir_unop_pack_snorm_2x16: 77 case ir_unop_pack_snorm_4x8: 78 case ir_unop_pack_unorm_2x16: 79 case ir_unop_pack_unorm_4x8: 80 return false; 81 82 /* these opcodes need to act on the whole vector, 83 * just like texturing. 84 */ 85 case ir_unop_interpolate_at_centroid: 86 case ir_binop_interpolate_at_offset: 87 case ir_binop_interpolate_at_sample: 88 case ir_unop_pack_double_2x32: 89 return false; 90 default: 91 break; 92 } 93 94 for (i = 0; i < expr->get_num_operands(); i++) { 95 if (expr->operands[i]->type->is_vector()) 96 return true; 97 } 98 99 return false; 100 } 101 102 bool 103 brw_do_channel_expressions(exec_list *instructions) 104 { 105 ir_channel_expressions_visitor v; 106 107 /* Pull out any matrix expression to a separate assignment to a 108 * temp. This will make our handling of the breakdown to 109 * operations on the matrix's vector components much easier. 110 */ 111 do_expression_flattening(instructions, channel_expressions_predicate); 112 113 visit_list_elements(&v, instructions); 114 115 return v.progress; 116 } 117 118 ir_rvalue * 119 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem) 120 { 121 ir_dereference *deref; 122 123 if (var->type->is_scalar()) 124 return new(mem_ctx) ir_dereference_variable(var); 125 126 assert(elem < var->type->components()); 127 deref = new(mem_ctx) ir_dereference_variable(var); 128 return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1); 129 } 130 131 void 132 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val) 133 { 134 ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL); 135 ir_assignment *assign; 136 137 /* This assign-of-expression should have been generated by the 138 * expression flattening visitor (since we never short circit to 139 * not flatten, even for plain assignments of variables), so the 140 * writemask is always full. 141 */ 142 assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1); 143 144 assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem)); 145 ir->insert_before(assign); 146 } 147 148 ir_visitor_status 149 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) 150 { 151 ir_expression *expr = ir->rhs->as_expression(); 152 bool found_vector = false; 153 unsigned int i, vector_elements = 1; 154 ir_variable *op_var[4]; 155 156 if (!expr) 157 return visit_continue; 158 159 if (!this->mem_ctx) 160 this->mem_ctx = ralloc_parent(ir); 161 162 for (i = 0; i < expr->get_num_operands(); i++) { 163 if (expr->operands[i]->type->is_vector()) { 164 found_vector = true; 165 vector_elements = expr->operands[i]->type->vector_elements; 166 break; 167 } 168 } 169 if (!found_vector) 170 return visit_continue; 171 172 switch (expr->operation) { 173 case ir_unop_pack_half_2x16: 174 case ir_unop_pack_snorm_2x16: 175 case ir_unop_pack_snorm_4x8: 176 case ir_unop_pack_unorm_2x16: 177 case ir_unop_pack_unorm_4x8: 178 case ir_unop_interpolate_at_centroid: 179 case ir_binop_interpolate_at_offset: 180 case ir_binop_interpolate_at_sample: 181 /* We scalarize these in NIR, so no need to do it here */ 182 case ir_unop_pack_double_2x32: 183 return visit_continue; 184 185 default: 186 break; 187 } 188 189 /* Store the expression operands in temps so we can use them 190 * multiple times. 191 */ 192 for (i = 0; i < expr->get_num_operands(); i++) { 193 ir_assignment *assign; 194 ir_dereference *deref; 195 196 assert(!expr->operands[i]->type->is_matrix()); 197 198 op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type, 199 "channel_expressions", 200 ir_var_temporary); 201 ir->insert_before(op_var[i]); 202 203 deref = new(mem_ctx) ir_dereference_variable(op_var[i]); 204 assign = new(mem_ctx) ir_assignment(deref, 205 expr->operands[i], 206 NULL); 207 ir->insert_before(assign); 208 } 209 210 const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type, 211 1, 1); 212 213 /* OK, time to break down this vector operation. */ 214 switch (expr->operation) { 215 case ir_unop_bit_not: 216 case ir_unop_logic_not: 217 case ir_unop_neg: 218 case ir_unop_abs: 219 case ir_unop_sign: 220 case ir_unop_rcp: 221 case ir_unop_rsq: 222 case ir_unop_sqrt: 223 case ir_unop_exp: 224 case ir_unop_log: 225 case ir_unop_exp2: 226 case ir_unop_log2: 227 case ir_unop_bitcast_i2f: 228 case ir_unop_bitcast_f2i: 229 case ir_unop_bitcast_f2u: 230 case ir_unop_bitcast_u2f: 231 case ir_unop_i2u: 232 case ir_unop_u2i: 233 case ir_unop_f2i: 234 case ir_unop_f2u: 235 case ir_unop_i2f: 236 case ir_unop_f2b: 237 case ir_unop_b2f: 238 case ir_unop_i2b: 239 case ir_unop_b2i: 240 case ir_unop_u2f: 241 case ir_unop_d2f: 242 case ir_unop_f2d: 243 case ir_unop_d2i: 244 case ir_unop_i2d: 245 case ir_unop_d2u: 246 case ir_unop_u2d: 247 case ir_unop_d2b: 248 case ir_unop_trunc: 249 case ir_unop_ceil: 250 case ir_unop_floor: 251 case ir_unop_fract: 252 case ir_unop_round_even: 253 case ir_unop_sin: 254 case ir_unop_cos: 255 case ir_unop_dFdx: 256 case ir_unop_dFdx_coarse: 257 case ir_unop_dFdx_fine: 258 case ir_unop_dFdy: 259 case ir_unop_dFdy_coarse: 260 case ir_unop_dFdy_fine: 261 case ir_unop_bitfield_reverse: 262 case ir_unop_bit_count: 263 case ir_unop_find_msb: 264 case ir_unop_find_lsb: 265 case ir_unop_saturate: 266 case ir_unop_subroutine_to_int: 267 for (i = 0; i < vector_elements; i++) { 268 ir_rvalue *op0 = get_element(op_var[0], i); 269 270 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 271 element_type, 272 op0, 273 NULL)); 274 } 275 break; 276 277 case ir_binop_add: 278 case ir_binop_sub: 279 case ir_binop_mul: 280 case ir_binop_imul_high: 281 case ir_binop_div: 282 case ir_binop_carry: 283 case ir_binop_borrow: 284 case ir_binop_mod: 285 case ir_binop_min: 286 case ir_binop_max: 287 case ir_binop_pow: 288 case ir_binop_lshift: 289 case ir_binop_rshift: 290 case ir_binop_bit_and: 291 case ir_binop_bit_xor: 292 case ir_binop_bit_or: 293 case ir_binop_logic_and: 294 case ir_binop_logic_xor: 295 case ir_binop_logic_or: 296 case ir_binop_less: 297 case ir_binop_greater: 298 case ir_binop_lequal: 299 case ir_binop_gequal: 300 case ir_binop_equal: 301 case ir_binop_nequal: 302 case ir_binop_ldexp: 303 for (i = 0; i < vector_elements; i++) { 304 ir_rvalue *op0 = get_element(op_var[0], i); 305 ir_rvalue *op1 = get_element(op_var[1], i); 306 307 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 308 element_type, 309 op0, 310 op1)); 311 } 312 break; 313 314 case ir_binop_dot: { 315 ir_expression *last = NULL; 316 for (i = 0; i < vector_elements; i++) { 317 ir_rvalue *op0 = get_element(op_var[0], i); 318 ir_rvalue *op1 = get_element(op_var[1], i); 319 ir_expression *temp; 320 321 temp = new(mem_ctx) ir_expression(ir_binop_mul, 322 element_type, 323 op0, 324 op1); 325 if (last) { 326 last = new(mem_ctx) ir_expression(ir_binop_add, 327 element_type, 328 temp, 329 last); 330 } else { 331 last = temp; 332 } 333 } 334 assign(ir, 0, last); 335 break; 336 } 337 338 case ir_binop_all_equal: 339 case ir_binop_any_nequal: { 340 ir_expression *last = NULL; 341 for (i = 0; i < vector_elements; i++) { 342 ir_rvalue *op0 = get_element(op_var[0], i); 343 ir_rvalue *op1 = get_element(op_var[1], i); 344 ir_expression *temp; 345 ir_expression_operation join; 346 347 if (expr->operation == ir_binop_all_equal) 348 join = ir_binop_logic_and; 349 else 350 join = ir_binop_logic_or; 351 352 temp = new(mem_ctx) ir_expression(expr->operation, 353 element_type, 354 op0, 355 op1); 356 if (last) { 357 last = new(mem_ctx) ir_expression(join, 358 element_type, 359 temp, 360 last); 361 } else { 362 last = temp; 363 } 364 } 365 assign(ir, 0, last); 366 break; 367 } 368 case ir_unop_noise: 369 unreachable("noise should have been broken down to function call"); 370 371 case ir_binop_ubo_load: 372 case ir_unop_get_buffer_size: 373 unreachable("not yet supported"); 374 375 case ir_triop_fma: 376 case ir_triop_lrp: 377 case ir_triop_csel: 378 case ir_triop_bitfield_extract: 379 for (i = 0; i < vector_elements; i++) { 380 ir_rvalue *op0 = get_element(op_var[0], i); 381 ir_rvalue *op1 = get_element(op_var[1], i); 382 ir_rvalue *op2 = get_element(op_var[2], i); 383 384 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 385 element_type, 386 op0, 387 op1, 388 op2)); 389 } 390 break; 391 392 case ir_quadop_bitfield_insert: 393 for (i = 0; i < vector_elements; i++) { 394 ir_rvalue *op0 = get_element(op_var[0], i); 395 ir_rvalue *op1 = get_element(op_var[1], i); 396 ir_rvalue *op2 = get_element(op_var[2], i); 397 ir_rvalue *op3 = get_element(op_var[3], i); 398 399 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 400 element_type, 401 op0, 402 op1, 403 op2, 404 op3)); 405 } 406 break; 407 408 case ir_unop_pack_snorm_2x16: 409 case ir_unop_pack_snorm_4x8: 410 case ir_unop_pack_unorm_2x16: 411 case ir_unop_pack_unorm_4x8: 412 case ir_unop_pack_half_2x16: 413 case ir_unop_unpack_snorm_2x16: 414 case ir_unop_unpack_snorm_4x8: 415 case ir_unop_unpack_unorm_2x16: 416 case ir_unop_unpack_unorm_4x8: 417 case ir_unop_unpack_half_2x16: 418 case ir_binop_vector_extract: 419 case ir_triop_vector_insert: 420 case ir_quadop_vector: 421 case ir_unop_ssbo_unsized_array_length: 422 unreachable("should have been lowered"); 423 424 case ir_unop_interpolate_at_centroid: 425 case ir_binop_interpolate_at_offset: 426 case ir_binop_interpolate_at_sample: 427 case ir_unop_unpack_double_2x32: 428 unreachable("not reached: expression operates on scalars only"); 429 430 case ir_unop_pack_double_2x32: 431 unreachable("not reached: to be lowered in NIR, should've been skipped"); 432 433 case ir_unop_frexp_sig: 434 case ir_unop_frexp_exp: 435 unreachable("should have been lowered by lower_instructions"); 436 437 case ir_unop_vote_any: 438 case ir_unop_vote_all: 439 case ir_unop_vote_eq: 440 unreachable("unsupported"); 441 } 442 443 ir->remove(); 444 this->progress = true; 445 446 return visit_continue; 447 } 448