Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright (c) 2015 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file lower_shared_reference.cpp
     26  *
     27  * IR lower pass to replace dereferences of compute shader shared variables
     28  * with intrinsic function calls.
     29  *
     30  * This relieves drivers of the responsibility of allocating space for the
     31  * shared variables in the shared memory region.
     32  */
     33 
     34 #include "lower_buffer_access.h"
     35 #include "ir_builder.h"
     36 #include "main/macros.h"
     37 #include "util/list.h"
     38 #include "glsl_parser_extras.h"
     39 
     40 using namespace ir_builder;
     41 
     42 namespace {
     43 
     44 struct var_offset {
     45    struct list_head node;
     46    const ir_variable *var;
     47    unsigned offset;
     48 };
     49 
     50 class lower_shared_reference_visitor :
     51       public lower_buffer_access::lower_buffer_access {
     52 public:
     53 
     54    lower_shared_reference_visitor(struct gl_linked_shader *shader)
     55       : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u)
     56    {
     57       list_inithead(&var_offsets);
     58    }
     59 
     60    ~lower_shared_reference_visitor()
     61    {
     62       ralloc_free(list_ctx);
     63    }
     64 
     65    enum {
     66       shared_load_access,
     67       shared_store_access,
     68       shared_atomic_access,
     69    } buffer_access_type;
     70 
     71    void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
     72                              const glsl_type *type, ir_rvalue *offset,
     73                              unsigned mask, int channel);
     74 
     75    void handle_rvalue(ir_rvalue **rvalue);
     76    ir_visitor_status visit_enter(ir_assignment *ir);
     77    void handle_assignment(ir_assignment *ir);
     78 
     79    ir_call *lower_shared_atomic_intrinsic(ir_call *ir);
     80    ir_call *check_for_shared_atomic_intrinsic(ir_call *ir);
     81    ir_visitor_status visit_enter(ir_call *ir);
     82 
     83    unsigned get_shared_offset(const ir_variable *);
     84 
     85    ir_call *shared_load(void *mem_ctx, const struct glsl_type *type,
     86                         ir_rvalue *offset);
     87    ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
     88                          unsigned write_mask);
     89 
     90    void *list_ctx;
     91    struct gl_linked_shader *shader;
     92    struct list_head var_offsets;
     93    unsigned shared_size;
     94    bool progress;
     95 };
     96 
     97 unsigned
     98 lower_shared_reference_visitor::get_shared_offset(const ir_variable *var)
     99 {
    100    list_for_each_entry(var_offset, var_entry, &var_offsets, node) {
    101       if (var_entry->var == var)
    102          return var_entry->offset;
    103    }
    104 
    105    struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset);
    106    list_add(&new_entry->node, &var_offsets);
    107    new_entry->var = var;
    108 
    109    unsigned var_align = var->type->std430_base_alignment(false);
    110    new_entry->offset = glsl_align(shared_size, var_align);
    111 
    112    unsigned var_size = var->type->std430_size(false);
    113    shared_size = new_entry->offset + var_size;
    114 
    115    return new_entry->offset;
    116 }
    117 
    118 void
    119 lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
    120 {
    121    if (!*rvalue)
    122       return;
    123 
    124    ir_dereference *deref = (*rvalue)->as_dereference();
    125    if (!deref)
    126       return;
    127 
    128    ir_variable *var = deref->variable_referenced();
    129    if (!var || var->data.mode != ir_var_shader_shared)
    130       return;
    131 
    132    buffer_access_type = shared_load_access;
    133 
    134    void *mem_ctx = ralloc_parent(shader->ir);
    135 
    136    ir_rvalue *offset = NULL;
    137    unsigned const_offset = get_shared_offset(var);
    138    bool row_major;
    139    int matrix_columns;
    140    assert(var->get_interface_type() == NULL);
    141    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
    142 
    143    setup_buffer_access(mem_ctx, deref,
    144                        &offset, &const_offset,
    145                        &row_major, &matrix_columns, NULL, packing);
    146 
    147    /* Now that we've calculated the offset to the start of the
    148     * dereference, walk over the type and emit loads into a temporary.
    149     */
    150    const glsl_type *type = (*rvalue)->type;
    151    ir_variable *load_var = new(mem_ctx) ir_variable(type,
    152                                                     "shared_load_temp",
    153                                                     ir_var_temporary);
    154    base_ir->insert_before(load_var);
    155 
    156    ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
    157                                                        "shared_load_temp_offset",
    158                                                        ir_var_temporary);
    159    base_ir->insert_before(load_offset);
    160    base_ir->insert_before(assign(load_offset, offset));
    161 
    162    deref = new(mem_ctx) ir_dereference_variable(load_var);
    163 
    164    emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major,
    165                matrix_columns, packing, 0);
    166 
    167    *rvalue = deref;
    168 
    169    progress = true;
    170 }
    171 
    172 void
    173 lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
    174 {
    175    if (!ir || !ir->lhs)
    176       return;
    177 
    178    ir_rvalue *rvalue = ir->lhs->as_rvalue();
    179    if (!rvalue)
    180       return;
    181 
    182    ir_dereference *deref = ir->lhs->as_dereference();
    183    if (!deref)
    184       return;
    185 
    186    ir_variable *var = ir->lhs->variable_referenced();
    187    if (!var || var->data.mode != ir_var_shader_shared)
    188       return;
    189 
    190    buffer_access_type = shared_store_access;
    191 
    192    /* We have a write to a shared variable, so declare a temporary and rewrite
    193     * the assignment so that the temporary is the LHS.
    194     */
    195    void *mem_ctx = ralloc_parent(shader->ir);
    196 
    197    const glsl_type *type = rvalue->type;
    198    ir_variable *store_var = new(mem_ctx) ir_variable(type,
    199                                                      "shared_store_temp",
    200                                                      ir_var_temporary);
    201    base_ir->insert_before(store_var);
    202    ir->lhs = new(mem_ctx) ir_dereference_variable(store_var);
    203 
    204    ir_rvalue *offset = NULL;
    205    unsigned const_offset = get_shared_offset(var);
    206    bool row_major;
    207    int matrix_columns;
    208    assert(var->get_interface_type() == NULL);
    209    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
    210 
    211    setup_buffer_access(mem_ctx, deref,
    212                        &offset, &const_offset,
    213                        &row_major, &matrix_columns, NULL, packing);
    214 
    215    deref = new(mem_ctx) ir_dereference_variable(store_var);
    216 
    217    ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
    218                                                         "shared_store_temp_offset",
    219                                                         ir_var_temporary);
    220    base_ir->insert_before(store_offset);
    221    base_ir->insert_before(assign(store_offset, offset));
    222 
    223    /* Now we have to write the value assigned to the temporary back to memory */
    224    emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major,
    225                matrix_columns, packing, ir->write_mask);
    226 
    227    progress = true;
    228 }
    229 
    230 ir_visitor_status
    231 lower_shared_reference_visitor::visit_enter(ir_assignment *ir)
    232 {
    233    handle_assignment(ir);
    234    return rvalue_visit(ir);
    235 }
    236 
    237 void
    238 lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx,
    239                                                      ir_dereference *deref,
    240                                                      const glsl_type *type,
    241                                                      ir_rvalue *offset,
    242                                                      unsigned mask,
    243                                                      int channel)
    244 {
    245    if (buffer_access_type == shared_store_access) {
    246       ir_call *store = shared_store(mem_ctx, deref, offset, mask);
    247       base_ir->insert_after(store);
    248    } else {
    249       ir_call *load = shared_load(mem_ctx, type, offset);
    250       base_ir->insert_before(load);
    251       ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL);
    252       base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
    253                                     value));
    254    }
    255 }
    256 
    257 static bool
    258 compute_shader_enabled(const _mesa_glsl_parse_state *state)
    259 {
    260    return state->stage == MESA_SHADER_COMPUTE;
    261 }
    262 
    263 ir_call *
    264 lower_shared_reference_visitor::shared_store(void *mem_ctx,
    265                                              ir_rvalue *deref,
    266                                              ir_rvalue *offset,
    267                                              unsigned write_mask)
    268 {
    269    exec_list sig_params;
    270 
    271    ir_variable *offset_ref = new(mem_ctx)
    272       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
    273    sig_params.push_tail(offset_ref);
    274 
    275    ir_variable *val_ref = new(mem_ctx)
    276       ir_variable(deref->type, "value" , ir_var_function_in);
    277    sig_params.push_tail(val_ref);
    278 
    279    ir_variable *writemask_ref = new(mem_ctx)
    280       ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
    281    sig_params.push_tail(writemask_ref);
    282 
    283    ir_function_signature *sig = new(mem_ctx)
    284       ir_function_signature(glsl_type::void_type, compute_shader_enabled);
    285    assert(sig);
    286    sig->replace_parameters(&sig_params);
    287    sig->intrinsic_id = ir_intrinsic_shared_store;
    288 
    289    ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared");
    290    f->add_signature(sig);
    291 
    292    exec_list call_params;
    293    call_params.push_tail(offset->clone(mem_ctx, NULL));
    294    call_params.push_tail(deref->clone(mem_ctx, NULL));
    295    call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
    296    return new(mem_ctx) ir_call(sig, NULL, &call_params);
    297 }
    298 
    299 ir_call *
    300 lower_shared_reference_visitor::shared_load(void *mem_ctx,
    301                                             const struct glsl_type *type,
    302                                             ir_rvalue *offset)
    303 {
    304    exec_list sig_params;
    305 
    306    ir_variable *offset_ref = new(mem_ctx)
    307       ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
    308    sig_params.push_tail(offset_ref);
    309 
    310    ir_function_signature *sig =
    311       new(mem_ctx) ir_function_signature(type, compute_shader_enabled);
    312    assert(sig);
    313    sig->replace_parameters(&sig_params);
    314    sig->intrinsic_id = ir_intrinsic_shared_load;
    315 
    316    ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared");
    317    f->add_signature(sig);
    318 
    319    ir_variable *result = new(mem_ctx)
    320       ir_variable(type, "shared_load_result", ir_var_temporary);
    321    base_ir->insert_before(result);
    322    ir_dereference_variable *deref_result = new(mem_ctx)
    323       ir_dereference_variable(result);
    324 
    325    exec_list call_params;
    326    call_params.push_tail(offset->clone(mem_ctx, NULL));
    327 
    328    return new(mem_ctx) ir_call(sig, deref_result, &call_params);
    329 }
    330 
    331 /* Lowers the intrinsic call to a new internal intrinsic that swaps the access
    332  * to the shared variable in the first parameter by an offset. This involves
    333  * creating the new internal intrinsic (i.e. the new function signature).
    334  */
    335 ir_call *
    336 lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
    337 {
    338    /* Shared atomics usually have 2 parameters, the shared variable and an
    339     * integer argument. The exception is CompSwap, that has an additional
    340     * integer parameter.
    341     */
    342    int param_count = ir->actual_parameters.length();
    343    assert(param_count == 2 || param_count == 3);
    344 
    345    /* First argument must be a scalar integer shared variable */
    346    exec_node *param = ir->actual_parameters.get_head();
    347    ir_instruction *inst = (ir_instruction *) param;
    348    assert(inst->ir_type == ir_type_dereference_variable ||
    349           inst->ir_type == ir_type_dereference_array ||
    350           inst->ir_type == ir_type_dereference_record ||
    351           inst->ir_type == ir_type_swizzle);
    352 
    353    ir_rvalue *deref = (ir_rvalue *) inst;
    354    assert(deref->type->is_scalar() && deref->type->is_integer());
    355 
    356    ir_variable *var = deref->variable_referenced();
    357    assert(var);
    358 
    359    /* Compute the offset to the start if the dereference
    360     */
    361    void *mem_ctx = ralloc_parent(shader->ir);
    362 
    363    ir_rvalue *offset = NULL;
    364    unsigned const_offset = get_shared_offset(var);
    365    bool row_major;
    366    int matrix_columns;
    367    assert(var->get_interface_type() == NULL);
    368    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
    369    buffer_access_type = shared_atomic_access;
    370 
    371    setup_buffer_access(mem_ctx, deref,
    372                        &offset, &const_offset,
    373                        &row_major, &matrix_columns, NULL, packing);
    374 
    375    assert(offset);
    376    assert(!row_major);
    377    assert(matrix_columns == 1);
    378 
    379    ir_rvalue *deref_offset =
    380       add(offset, new(mem_ctx) ir_constant(const_offset));
    381 
    382    /* Create the new internal function signature that will take an offset
    383     * instead of a shared variable
    384     */
    385    exec_list sig_params;
    386    ir_variable *sig_param = new(mem_ctx)
    387       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
    388    sig_params.push_tail(sig_param);
    389 
    390    const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
    391       glsl_type::int_type : glsl_type::uint_type;
    392    sig_param = new(mem_ctx)
    393          ir_variable(type, "data1", ir_var_function_in);
    394    sig_params.push_tail(sig_param);
    395 
    396    if (param_count == 3) {
    397       sig_param = new(mem_ctx)
    398             ir_variable(type, "data2", ir_var_function_in);
    399       sig_params.push_tail(sig_param);
    400    }
    401 
    402    ir_function_signature *sig =
    403       new(mem_ctx) ir_function_signature(deref->type,
    404                                          compute_shader_enabled);
    405    assert(sig);
    406    sig->replace_parameters(&sig_params);
    407 
    408    assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
    409    assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
    410    sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, shared);
    411 
    412    char func_name[64];
    413    sprintf(func_name, "%s_shared", ir->callee_name());
    414    ir_function *f = new(mem_ctx) ir_function(func_name);
    415    f->add_signature(sig);
    416 
    417    /* Now, create the call to the internal intrinsic */
    418    exec_list call_params;
    419    call_params.push_tail(deref_offset);
    420    param = ir->actual_parameters.get_head()->get_next();
    421    ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
    422    call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
    423    if (param_count == 3) {
    424       param = param->get_next();
    425       param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
    426       call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
    427    }
    428    ir_dereference_variable *return_deref =
    429       ir->return_deref->clone(mem_ctx, NULL);
    430    return new(mem_ctx) ir_call(sig, return_deref, &call_params);
    431 }
    432 
    433 ir_call *
    434 lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir)
    435 {
    436    exec_list& params = ir->actual_parameters;
    437 
    438    if (params.length() < 2 || params.length() > 3)
    439       return ir;
    440 
    441    ir_rvalue *rvalue =
    442       ((ir_instruction *) params.get_head())->as_rvalue();
    443    if (!rvalue)
    444       return ir;
    445 
    446    ir_variable *var = rvalue->variable_referenced();
    447    if (!var || var->data.mode != ir_var_shader_shared)
    448       return ir;
    449 
    450    const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
    451    if (id == ir_intrinsic_generic_atomic_add ||
    452        id == ir_intrinsic_generic_atomic_min ||
    453        id == ir_intrinsic_generic_atomic_max ||
    454        id == ir_intrinsic_generic_atomic_and ||
    455        id == ir_intrinsic_generic_atomic_or ||
    456        id == ir_intrinsic_generic_atomic_xor ||
    457        id == ir_intrinsic_generic_atomic_exchange ||
    458        id == ir_intrinsic_generic_atomic_comp_swap) {
    459       return lower_shared_atomic_intrinsic(ir);
    460    }
    461 
    462    return ir;
    463 }
    464 
    465 ir_visitor_status
    466 lower_shared_reference_visitor::visit_enter(ir_call *ir)
    467 {
    468    ir_call *new_ir = check_for_shared_atomic_intrinsic(ir);
    469    if (new_ir != ir) {
    470       progress = true;
    471       base_ir->replace_with(new_ir);
    472       return visit_continue_with_parent;
    473    }
    474 
    475    return rvalue_visit(ir);
    476 }
    477 
    478 } /* unnamed namespace */
    479 
    480 void
    481 lower_shared_reference(struct gl_linked_shader *shader, unsigned *shared_size)
    482 {
    483    if (shader->Stage != MESA_SHADER_COMPUTE)
    484       return;
    485 
    486    lower_shared_reference_visitor v(shader);
    487 
    488    /* Loop over the instructions lowering references, because we take a deref
    489     * of an shared variable array using a shared variable dereference as the
    490     * index will produce a collection of instructions all of which have cloned
    491     * shared variable dereferences for that array index.
    492     */
    493    do {
    494       v.progress = false;
    495       visit_list_elements(&v, shader->ir);
    496    } while (v.progress);
    497 
    498    *shared_size = v.shared_size;
    499 }
    500