Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file brw_wm_channel_expressions.cpp
     26  *
     27  * Breaks vector operations down into operations on each component.
     28  *
     29  * The 965 fragment shader receives 8 or 16 pixels at a time, so each
     30  * channel of a vector is laid out as 1 or 2 8-float registers.  Each
     31  * ALU operation operates on one of those channel registers.  As a
     32  * result, there is no value to the 965 fragment shader in tracking
     33  * "vector" expressions in the sense of GLSL fragment shaders, when
     34  * doing a channel at a time may help in constant folding, algebraic
     35  * simplification, and reducing the liveness of channel registers.
     36  *
     37  * The exception to the desire to break everything down to floats is
     38  * texturing.  The texture sampler returns a writemasked masked
     39  * 4/8-register sequence containing the texture values.  We don't want
     40  * to dispatch to the sampler separately for each channel we need, so
     41  * we do retain the vector types in that case.
     42  */
     43 
     44 #include "compiler/glsl/ir.h"
     45 #include "compiler/glsl/ir_expression_flattening.h"
     46 #include "compiler/glsl_types.h"
     47 
     48 class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
     49 public:
     50    ir_channel_expressions_visitor()
     51    {
     52       this->progress = false;
     53       this->mem_ctx = NULL;
     54    }
     55 
     56    ir_visitor_status visit_leave(ir_assignment *);
     57 
     58    ir_rvalue *get_element(ir_variable *var, unsigned int element);
     59    void assign(ir_assignment *ir, int elem, ir_rvalue *val);
     60 
     61    bool progress;
     62    void *mem_ctx;
     63 };
     64 
     65 static bool
     66 channel_expressions_predicate(ir_instruction *ir)
     67 {
     68    ir_expression *expr = ir->as_expression();
     69    unsigned int i;
     70 
     71    if (!expr)
     72       return false;
     73 
     74    switch (expr->operation) {
     75       case ir_unop_pack_half_2x16:
     76       case ir_unop_pack_snorm_2x16:
     77       case ir_unop_pack_snorm_4x8:
     78       case ir_unop_pack_unorm_2x16:
     79       case ir_unop_pack_unorm_4x8:
     80          return false;
     81 
     82       /* these opcodes need to act on the whole vector,
     83        * just like texturing.
     84        */
     85       case ir_unop_interpolate_at_centroid:
     86       case ir_binop_interpolate_at_offset:
     87       case ir_binop_interpolate_at_sample:
     88       case ir_unop_pack_double_2x32:
     89          return false;
     90       default:
     91          break;
     92    }
     93 
     94    for (i = 0; i < expr->get_num_operands(); i++) {
     95       if (expr->operands[i]->type->is_vector())
     96 	 return true;
     97    }
     98 
     99    return false;
    100 }
    101 
    102 bool
    103 brw_do_channel_expressions(exec_list *instructions)
    104 {
    105    ir_channel_expressions_visitor v;
    106 
    107    /* Pull out any matrix expression to a separate assignment to a
    108     * temp.  This will make our handling of the breakdown to
    109     * operations on the matrix's vector components much easier.
    110     */
    111    do_expression_flattening(instructions, channel_expressions_predicate);
    112 
    113    visit_list_elements(&v, instructions);
    114 
    115    return v.progress;
    116 }
    117 
    118 ir_rvalue *
    119 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
    120 {
    121    ir_dereference *deref;
    122 
    123    if (var->type->is_scalar())
    124       return new(mem_ctx) ir_dereference_variable(var);
    125 
    126    assert(elem < var->type->components());
    127    deref = new(mem_ctx) ir_dereference_variable(var);
    128    return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
    129 }
    130 
    131 void
    132 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
    133 {
    134    ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
    135    ir_assignment *assign;
    136 
    137    /* This assign-of-expression should have been generated by the
    138     * expression flattening visitor (since we never short circit to
    139     * not flatten, even for plain assignments of variables), so the
    140     * writemask is always full.
    141     */
    142    assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
    143 
    144    assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
    145    ir->insert_before(assign);
    146 }
    147 
    148 ir_visitor_status
    149 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
    150 {
    151    ir_expression *expr = ir->rhs->as_expression();
    152    bool found_vector = false;
    153    unsigned int i, vector_elements = 1;
    154    ir_variable *op_var[4];
    155 
    156    if (!expr)
    157       return visit_continue;
    158 
    159    if (!this->mem_ctx)
    160       this->mem_ctx = ralloc_parent(ir);
    161 
    162    for (i = 0; i < expr->get_num_operands(); i++) {
    163       if (expr->operands[i]->type->is_vector()) {
    164 	 found_vector = true;
    165 	 vector_elements = expr->operands[i]->type->vector_elements;
    166 	 break;
    167       }
    168    }
    169    if (!found_vector)
    170       return visit_continue;
    171 
    172    switch (expr->operation) {
    173       case ir_unop_pack_half_2x16:
    174       case ir_unop_pack_snorm_2x16:
    175       case ir_unop_pack_snorm_4x8:
    176       case ir_unop_pack_unorm_2x16:
    177       case ir_unop_pack_unorm_4x8:
    178       case ir_unop_interpolate_at_centroid:
    179       case ir_binop_interpolate_at_offset:
    180       case ir_binop_interpolate_at_sample:
    181       /* We scalarize these in NIR, so no need to do it here */
    182       case ir_unop_pack_double_2x32:
    183          return visit_continue;
    184 
    185       default:
    186          break;
    187    }
    188 
    189    /* Store the expression operands in temps so we can use them
    190     * multiple times.
    191     */
    192    for (i = 0; i < expr->get_num_operands(); i++) {
    193       ir_assignment *assign;
    194       ir_dereference *deref;
    195 
    196       assert(!expr->operands[i]->type->is_matrix());
    197 
    198       op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
    199 					   "channel_expressions",
    200 					   ir_var_temporary);
    201       ir->insert_before(op_var[i]);
    202 
    203       deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
    204       assign = new(mem_ctx) ir_assignment(deref,
    205 					  expr->operands[i],
    206 					  NULL);
    207       ir->insert_before(assign);
    208    }
    209 
    210    const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
    211 							   1, 1);
    212 
    213    /* OK, time to break down this vector operation. */
    214    switch (expr->operation) {
    215    case ir_unop_bit_not:
    216    case ir_unop_logic_not:
    217    case ir_unop_neg:
    218    case ir_unop_abs:
    219    case ir_unop_sign:
    220    case ir_unop_rcp:
    221    case ir_unop_rsq:
    222    case ir_unop_sqrt:
    223    case ir_unop_exp:
    224    case ir_unop_log:
    225    case ir_unop_exp2:
    226    case ir_unop_log2:
    227    case ir_unop_bitcast_i2f:
    228    case ir_unop_bitcast_f2i:
    229    case ir_unop_bitcast_f2u:
    230    case ir_unop_bitcast_u2f:
    231    case ir_unop_i2u:
    232    case ir_unop_u2i:
    233    case ir_unop_f2i:
    234    case ir_unop_f2u:
    235    case ir_unop_i2f:
    236    case ir_unop_f2b:
    237    case ir_unop_b2f:
    238    case ir_unop_i2b:
    239    case ir_unop_b2i:
    240    case ir_unop_u2f:
    241    case ir_unop_d2f:
    242    case ir_unop_f2d:
    243    case ir_unop_d2i:
    244    case ir_unop_i2d:
    245    case ir_unop_d2u:
    246    case ir_unop_u2d:
    247    case ir_unop_d2b:
    248    case ir_unop_trunc:
    249    case ir_unop_ceil:
    250    case ir_unop_floor:
    251    case ir_unop_fract:
    252    case ir_unop_round_even:
    253    case ir_unop_sin:
    254    case ir_unop_cos:
    255    case ir_unop_dFdx:
    256    case ir_unop_dFdx_coarse:
    257    case ir_unop_dFdx_fine:
    258    case ir_unop_dFdy:
    259    case ir_unop_dFdy_coarse:
    260    case ir_unop_dFdy_fine:
    261    case ir_unop_bitfield_reverse:
    262    case ir_unop_bit_count:
    263    case ir_unop_find_msb:
    264    case ir_unop_find_lsb:
    265    case ir_unop_saturate:
    266    case ir_unop_subroutine_to_int:
    267       for (i = 0; i < vector_elements; i++) {
    268 	 ir_rvalue *op0 = get_element(op_var[0], i);
    269 
    270 	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
    271 						  element_type,
    272 						  op0,
    273 						  NULL));
    274       }
    275       break;
    276 
    277    case ir_binop_add:
    278    case ir_binop_sub:
    279    case ir_binop_mul:
    280    case ir_binop_imul_high:
    281    case ir_binop_div:
    282    case ir_binop_carry:
    283    case ir_binop_borrow:
    284    case ir_binop_mod:
    285    case ir_binop_min:
    286    case ir_binop_max:
    287    case ir_binop_pow:
    288    case ir_binop_lshift:
    289    case ir_binop_rshift:
    290    case ir_binop_bit_and:
    291    case ir_binop_bit_xor:
    292    case ir_binop_bit_or:
    293    case ir_binop_logic_and:
    294    case ir_binop_logic_xor:
    295    case ir_binop_logic_or:
    296    case ir_binop_less:
    297    case ir_binop_greater:
    298    case ir_binop_lequal:
    299    case ir_binop_gequal:
    300    case ir_binop_equal:
    301    case ir_binop_nequal:
    302    case ir_binop_ldexp:
    303       for (i = 0; i < vector_elements; i++) {
    304 	 ir_rvalue *op0 = get_element(op_var[0], i);
    305 	 ir_rvalue *op1 = get_element(op_var[1], i);
    306 
    307 	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
    308 						  element_type,
    309 						  op0,
    310 						  op1));
    311       }
    312       break;
    313 
    314    case ir_binop_dot: {
    315       ir_expression *last = NULL;
    316       for (i = 0; i < vector_elements; i++) {
    317 	 ir_rvalue *op0 = get_element(op_var[0], i);
    318 	 ir_rvalue *op1 = get_element(op_var[1], i);
    319 	 ir_expression *temp;
    320 
    321 	 temp = new(mem_ctx) ir_expression(ir_binop_mul,
    322 					   element_type,
    323 					   op0,
    324 					   op1);
    325 	 if (last) {
    326 	    last = new(mem_ctx) ir_expression(ir_binop_add,
    327 					      element_type,
    328 					      temp,
    329 					      last);
    330 	 } else {
    331 	    last = temp;
    332 	 }
    333       }
    334       assign(ir, 0, last);
    335       break;
    336    }
    337 
    338    case ir_binop_all_equal:
    339    case ir_binop_any_nequal: {
    340       ir_expression *last = NULL;
    341       for (i = 0; i < vector_elements; i++) {
    342 	 ir_rvalue *op0 = get_element(op_var[0], i);
    343 	 ir_rvalue *op1 = get_element(op_var[1], i);
    344 	 ir_expression *temp;
    345 	 ir_expression_operation join;
    346 
    347 	 if (expr->operation == ir_binop_all_equal)
    348 	    join = ir_binop_logic_and;
    349 	 else
    350 	    join = ir_binop_logic_or;
    351 
    352 	 temp = new(mem_ctx) ir_expression(expr->operation,
    353 					   element_type,
    354 					   op0,
    355 					   op1);
    356 	 if (last) {
    357 	    last = new(mem_ctx) ir_expression(join,
    358 					      element_type,
    359 					      temp,
    360 					      last);
    361 	 } else {
    362 	    last = temp;
    363 	 }
    364       }
    365       assign(ir, 0, last);
    366       break;
    367    }
    368    case ir_unop_noise:
    369       unreachable("noise should have been broken down to function call");
    370 
    371    case ir_binop_ubo_load:
    372    case ir_unop_get_buffer_size:
    373       unreachable("not yet supported");
    374 
    375    case ir_triop_fma:
    376    case ir_triop_lrp:
    377    case ir_triop_csel:
    378    case ir_triop_bitfield_extract:
    379       for (i = 0; i < vector_elements; i++) {
    380 	 ir_rvalue *op0 = get_element(op_var[0], i);
    381 	 ir_rvalue *op1 = get_element(op_var[1], i);
    382 	 ir_rvalue *op2 = get_element(op_var[2], i);
    383 
    384 	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
    385 						  element_type,
    386 						  op0,
    387 						  op1,
    388 						  op2));
    389       }
    390       break;
    391 
    392    case ir_quadop_bitfield_insert:
    393       for (i = 0; i < vector_elements; i++) {
    394          ir_rvalue *op0 = get_element(op_var[0], i);
    395          ir_rvalue *op1 = get_element(op_var[1], i);
    396          ir_rvalue *op2 = get_element(op_var[2], i);
    397          ir_rvalue *op3 = get_element(op_var[3], i);
    398 
    399          assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
    400                                                   element_type,
    401                                                   op0,
    402                                                   op1,
    403                                                   op2,
    404                                                   op3));
    405       }
    406       break;
    407 
    408    case ir_unop_pack_snorm_2x16:
    409    case ir_unop_pack_snorm_4x8:
    410    case ir_unop_pack_unorm_2x16:
    411    case ir_unop_pack_unorm_4x8:
    412    case ir_unop_pack_half_2x16:
    413    case ir_unop_unpack_snorm_2x16:
    414    case ir_unop_unpack_snorm_4x8:
    415    case ir_unop_unpack_unorm_2x16:
    416    case ir_unop_unpack_unorm_4x8:
    417    case ir_unop_unpack_half_2x16:
    418    case ir_binop_vector_extract:
    419    case ir_triop_vector_insert:
    420    case ir_quadop_vector:
    421    case ir_unop_ssbo_unsized_array_length:
    422       unreachable("should have been lowered");
    423 
    424    case ir_unop_interpolate_at_centroid:
    425    case ir_binop_interpolate_at_offset:
    426    case ir_binop_interpolate_at_sample:
    427    case ir_unop_unpack_double_2x32:
    428       unreachable("not reached: expression operates on scalars only");
    429 
    430    case ir_unop_pack_double_2x32:
    431       unreachable("not reached: to be lowered in NIR, should've been skipped");
    432 
    433    case ir_unop_frexp_sig:
    434    case ir_unop_frexp_exp:
    435       unreachable("should have been lowered by lower_instructions");
    436 
    437    case ir_unop_vote_any:
    438    case ir_unop_vote_all:
    439    case ir_unop_vote_eq:
    440       unreachable("unsupported");
    441    }
    442 
    443    ir->remove();
    444    this->progress = true;
    445 
    446    return visit_continue;
    447 }
    448