Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file brw_wm_channel_expressions.cpp
     26  *
     27  * Breaks vector operations down into operations on each component.
     28  *
     29  * The 965 fragment shader receives 8 or 16 pixels at a time, so each
     30  * channel of a vector is laid out as 1 or 2 8-float registers.  Each
     31  * ALU operation operates on one of those channel registers.  As a
     32  * result, there is no value to the 965 fragment shader in tracking
     33  * "vector" expressions in the sense of GLSL fragment shaders, when
     34  * doing a channel at a time may help in constant folding, algebraic
     35  * simplification, and reducing the liveness of channel registers.
     36  *
     37  * The exception to the desire to break everything down to floats is
     38  * texturing.  The texture sampler returns a writemasked masked
     39  * 4/8-register sequence containing the texture values.  We don't want
     40  * to dispatch to the sampler separately for each channel we need, so
     41  * we do retain the vector types in that case.
     42  */
     43 
     44 extern "C" {
     45 #include "main/core.h"
     46 #include "brw_wm.h"
     47 }
     48 #include "glsl/ir.h"
     49 #include "glsl/ir_expression_flattening.h"
     50 #include "glsl/glsl_types.h"
     51 
     52 class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
     53 public:
     54    ir_channel_expressions_visitor()
     55    {
     56       this->progress = false;
     57       this->mem_ctx = NULL;
     58    }
     59 
     60    ir_visitor_status visit_leave(ir_assignment *);
     61 
     62    ir_rvalue *get_element(ir_variable *var, unsigned int element);
     63    void assign(ir_assignment *ir, int elem, ir_rvalue *val);
     64 
     65    bool progress;
     66    void *mem_ctx;
     67 };
     68 
     69 static bool
     70 channel_expressions_predicate(ir_instruction *ir)
     71 {
     72    ir_expression *expr = ir->as_expression();
     73    unsigned int i;
     74 
     75    if (!expr)
     76       return false;
     77 
     78    for (i = 0; i < expr->get_num_operands(); i++) {
     79       if (expr->operands[i]->type->is_vector())
     80 	 return true;
     81    }
     82 
     83    return false;
     84 }
     85 
     86 bool
     87 brw_do_channel_expressions(exec_list *instructions)
     88 {
     89    ir_channel_expressions_visitor v;
     90 
     91    /* Pull out any matrix expression to a separate assignment to a
     92     * temp.  This will make our handling of the breakdown to
     93     * operations on the matrix's vector components much easier.
     94     */
     95    do_expression_flattening(instructions, channel_expressions_predicate);
     96 
     97    visit_list_elements(&v, instructions);
     98 
     99    return v.progress;
    100 }
    101 
    102 ir_rvalue *
    103 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
    104 {
    105    ir_dereference *deref;
    106 
    107    if (var->type->is_scalar())
    108       return new(mem_ctx) ir_dereference_variable(var);
    109 
    110    assert(elem < var->type->components());
    111    deref = new(mem_ctx) ir_dereference_variable(var);
    112    return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
    113 }
    114 
    115 void
    116 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
    117 {
    118    ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
    119    ir_assignment *assign;
    120 
    121    /* This assign-of-expression should have been generated by the
    122     * expression flattening visitor (since we never short circit to
    123     * not flatten, even for plain assignments of variables), so the
    124     * writemask is always full.
    125     */
    126    assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
    127 
    128    assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
    129    ir->insert_before(assign);
    130 }
    131 
    132 ir_visitor_status
    133 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
    134 {
    135    ir_expression *expr = ir->rhs->as_expression();
    136    bool found_vector = false;
    137    unsigned int i, vector_elements = 1;
    138    ir_variable *op_var[2];
    139 
    140    if (!expr)
    141       return visit_continue;
    142 
    143    if (!this->mem_ctx)
    144       this->mem_ctx = ralloc_parent(ir);
    145 
    146    for (i = 0; i < expr->get_num_operands(); i++) {
    147       if (expr->operands[i]->type->is_vector()) {
    148 	 found_vector = true;
    149 	 vector_elements = expr->operands[i]->type->vector_elements;
    150 	 break;
    151       }
    152    }
    153    if (!found_vector)
    154       return visit_continue;
    155 
    156    /* Store the expression operands in temps so we can use them
    157     * multiple times.
    158     */
    159    for (i = 0; i < expr->get_num_operands(); i++) {
    160       ir_assignment *assign;
    161       ir_dereference *deref;
    162 
    163       assert(!expr->operands[i]->type->is_matrix());
    164 
    165       op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
    166 					   "channel_expressions",
    167 					   ir_var_temporary);
    168       ir->insert_before(op_var[i]);
    169 
    170       deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
    171       assign = new(mem_ctx) ir_assignment(deref,
    172 					  expr->operands[i],
    173 					  NULL);
    174       ir->insert_before(assign);
    175    }
    176 
    177    const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
    178 							   1, 1);
    179 
    180    /* OK, time to break down this vector operation. */
    181    switch (expr->operation) {
    182    case ir_unop_bit_not:
    183    case ir_unop_logic_not:
    184    case ir_unop_neg:
    185    case ir_unop_abs:
    186    case ir_unop_sign:
    187    case ir_unop_rcp:
    188    case ir_unop_rsq:
    189    case ir_unop_sqrt:
    190    case ir_unop_exp:
    191    case ir_unop_log:
    192    case ir_unop_exp2:
    193    case ir_unop_log2:
    194    case ir_unop_bitcast_i2f:
    195    case ir_unop_bitcast_f2i:
    196    case ir_unop_bitcast_f2u:
    197    case ir_unop_bitcast_u2f:
    198    case ir_unop_i2u:
    199    case ir_unop_u2i:
    200    case ir_unop_f2i:
    201    case ir_unop_f2u:
    202    case ir_unop_i2f:
    203    case ir_unop_f2b:
    204    case ir_unop_b2f:
    205    case ir_unop_i2b:
    206    case ir_unop_b2i:
    207    case ir_unop_u2f:
    208    case ir_unop_trunc:
    209    case ir_unop_ceil:
    210    case ir_unop_floor:
    211    case ir_unop_fract:
    212    case ir_unop_round_even:
    213    case ir_unop_sin:
    214    case ir_unop_cos:
    215    case ir_unop_sin_reduced:
    216    case ir_unop_cos_reduced:
    217    case ir_unop_dFdx:
    218    case ir_unop_dFdy:
    219       for (i = 0; i < vector_elements; i++) {
    220 	 ir_rvalue *op0 = get_element(op_var[0], i);
    221 
    222 	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
    223 						  element_type,
    224 						  op0,
    225 						  NULL));
    226       }
    227       break;
    228 
    229    case ir_binop_add:
    230    case ir_binop_sub:
    231    case ir_binop_mul:
    232    case ir_binop_div:
    233    case ir_binop_mod:
    234    case ir_binop_min:
    235    case ir_binop_max:
    236    case ir_binop_pow:
    237    case ir_binop_lshift:
    238    case ir_binop_rshift:
    239    case ir_binop_bit_and:
    240    case ir_binop_bit_xor:
    241    case ir_binop_bit_or:
    242    case ir_binop_less:
    243    case ir_binop_greater:
    244    case ir_binop_lequal:
    245    case ir_binop_gequal:
    246    case ir_binop_equal:
    247    case ir_binop_nequal:
    248       for (i = 0; i < vector_elements; i++) {
    249 	 ir_rvalue *op0 = get_element(op_var[0], i);
    250 	 ir_rvalue *op1 = get_element(op_var[1], i);
    251 
    252 	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
    253 						  element_type,
    254 						  op0,
    255 						  op1));
    256       }
    257       break;
    258 
    259    case ir_unop_any: {
    260       ir_expression *temp;
    261       temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
    262 					element_type,
    263 					get_element(op_var[0], 0),
    264 					get_element(op_var[0], 1));
    265 
    266       for (i = 2; i < vector_elements; i++) {
    267 	 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
    268 					   element_type,
    269 					   get_element(op_var[0], i),
    270 					   temp);
    271       }
    272       assign(ir, 0, temp);
    273       break;
    274    }
    275 
    276    case ir_binop_dot: {
    277       ir_expression *last = NULL;
    278       for (i = 0; i < vector_elements; i++) {
    279 	 ir_rvalue *op0 = get_element(op_var[0], i);
    280 	 ir_rvalue *op1 = get_element(op_var[1], i);
    281 	 ir_expression *temp;
    282 
    283 	 temp = new(mem_ctx) ir_expression(ir_binop_mul,
    284 					   element_type,
    285 					   op0,
    286 					   op1);
    287 	 if (last) {
    288 	    last = new(mem_ctx) ir_expression(ir_binop_add,
    289 					      element_type,
    290 					      temp,
    291 					      last);
    292 	 } else {
    293 	    last = temp;
    294 	 }
    295       }
    296       assign(ir, 0, last);
    297       break;
    298    }
    299 
    300    case ir_binop_logic_and:
    301    case ir_binop_logic_xor:
    302    case ir_binop_logic_or:
    303       ir->print();
    304       printf("\n");
    305       assert(!"not reached: expression operates on scalars only");
    306       break;
    307    case ir_binop_all_equal:
    308    case ir_binop_any_nequal: {
    309       ir_expression *last = NULL;
    310       for (i = 0; i < vector_elements; i++) {
    311 	 ir_rvalue *op0 = get_element(op_var[0], i);
    312 	 ir_rvalue *op1 = get_element(op_var[1], i);
    313 	 ir_expression *temp;
    314 	 ir_expression_operation join;
    315 
    316 	 if (expr->operation == ir_binop_all_equal)
    317 	    join = ir_binop_logic_and;
    318 	 else
    319 	    join = ir_binop_logic_or;
    320 
    321 	 temp = new(mem_ctx) ir_expression(expr->operation,
    322 					   element_type,
    323 					   op0,
    324 					   op1);
    325 	 if (last) {
    326 	    last = new(mem_ctx) ir_expression(join,
    327 					      element_type,
    328 					      temp,
    329 					      last);
    330 	 } else {
    331 	    last = temp;
    332 	 }
    333       }
    334       assign(ir, 0, last);
    335       break;
    336    }
    337    case ir_unop_noise:
    338       assert(!"noise should have been broken down to function call");
    339       break;
    340 
    341    case ir_binop_ubo_load:
    342       assert(!"not yet supported");
    343       break;
    344 
    345    case ir_quadop_vector:
    346       assert(!"should have been lowered");
    347       break;
    348    }
    349 
    350    ir->remove();
    351    this->progress = true;
    352 
    353    return visit_continue;
    354 }
    355