Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright (c) 2015 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file lower_shared_reference.cpp
     26  *
     27  * IR lower pass to replace dereferences of compute shader shared variables
     28  * with intrinsic function calls.
     29  *
     30  * This relieves drivers of the responsibility of allocating space for the
     31  * shared variables in the shared memory region.
     32  */
     33 
     34 #include "lower_buffer_access.h"
     35 #include "ir_builder.h"
     36 #include "linker.h"
     37 #include "main/macros.h"
     38 #include "util/list.h"
     39 #include "glsl_parser_extras.h"
     40 
     41 using namespace ir_builder;
     42 
     43 namespace {
     44 
     45 struct var_offset {
     46    struct list_head node;
     47    const ir_variable *var;
     48    unsigned offset;
     49 };
     50 
     51 class lower_shared_reference_visitor :
     52       public lower_buffer_access::lower_buffer_access {
     53 public:
     54 
     55    lower_shared_reference_visitor(struct gl_linked_shader *shader)
     56       : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u)
     57    {
     58       list_inithead(&var_offsets);
     59    }
     60 
     61    ~lower_shared_reference_visitor()
     62    {
     63       ralloc_free(list_ctx);
     64    }
     65 
     66    enum {
     67       shared_load_access,
     68       shared_store_access,
     69       shared_atomic_access,
     70    } buffer_access_type;
     71 
     72    void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
     73                              const glsl_type *type, ir_rvalue *offset,
     74                              unsigned mask, int channel);
     75 
     76    void handle_rvalue(ir_rvalue **rvalue);
     77    ir_visitor_status visit_enter(ir_assignment *ir);
     78    void handle_assignment(ir_assignment *ir);
     79 
     80    ir_call *lower_shared_atomic_intrinsic(ir_call *ir);
     81    ir_call *check_for_shared_atomic_intrinsic(ir_call *ir);
     82    ir_visitor_status visit_enter(ir_call *ir);
     83 
     84    unsigned get_shared_offset(const ir_variable *);
     85 
     86    ir_call *shared_load(void *mem_ctx, const struct glsl_type *type,
     87                         ir_rvalue *offset);
     88    ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
     89                          unsigned write_mask);
     90 
     91    void *list_ctx;
     92    struct gl_linked_shader *shader;
     93    struct list_head var_offsets;
     94    unsigned shared_size;
     95    bool progress;
     96 };
     97 
     98 unsigned
     99 lower_shared_reference_visitor::get_shared_offset(const ir_variable *var)
    100 {
    101    list_for_each_entry(var_offset, var_entry, &var_offsets, node) {
    102       if (var_entry->var == var)
    103          return var_entry->offset;
    104    }
    105 
    106    struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset);
    107    list_add(&new_entry->node, &var_offsets);
    108    new_entry->var = var;
    109 
    110    unsigned var_align = var->type->std430_base_alignment(false);
    111    new_entry->offset = glsl_align(shared_size, var_align);
    112 
    113    unsigned var_size = var->type->std430_size(false);
    114    shared_size = new_entry->offset + var_size;
    115 
    116    return new_entry->offset;
    117 }
    118 
    119 void
    120 lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
    121 {
    122    if (!*rvalue)
    123       return;
    124 
    125    ir_dereference *deref = (*rvalue)->as_dereference();
    126    if (!deref)
    127       return;
    128 
    129    ir_variable *var = deref->variable_referenced();
    130    if (!var || var->data.mode != ir_var_shader_shared)
    131       return;
    132 
    133    buffer_access_type = shared_load_access;
    134 
    135    void *mem_ctx = ralloc_parent(shader->ir);
    136 
    137    ir_rvalue *offset = NULL;
    138    unsigned const_offset = get_shared_offset(var);
    139    bool row_major;
    140    const glsl_type *matrix_type;
    141    assert(var->get_interface_type() == NULL);
    142    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
    143 
    144    setup_buffer_access(mem_ctx, deref,
    145                        &offset, &const_offset,
    146                        &row_major, &matrix_type, NULL, packing);
    147 
    148    /* Now that we've calculated the offset to the start of the
    149     * dereference, walk over the type and emit loads into a temporary.
    150     */
    151    const glsl_type *type = (*rvalue)->type;
    152    ir_variable *load_var = new(mem_ctx) ir_variable(type,
    153                                                     "shared_load_temp",
    154                                                     ir_var_temporary);
    155    base_ir->insert_before(load_var);
    156 
    157    ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
    158                                                        "shared_load_temp_offset",
    159                                                        ir_var_temporary);
    160    base_ir->insert_before(load_offset);
    161    base_ir->insert_before(assign(load_offset, offset));
    162 
    163    deref = new(mem_ctx) ir_dereference_variable(load_var);
    164 
    165    emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major,
    166                matrix_type, packing, 0);
    167 
    168    *rvalue = deref;
    169 
    170    progress = true;
    171 }
    172 
    173 void
    174 lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
    175 {
    176    if (!ir || !ir->lhs)
    177       return;
    178 
    179    ir_rvalue *rvalue = ir->lhs->as_rvalue();
    180    if (!rvalue)
    181       return;
    182 
    183    ir_dereference *deref = ir->lhs->as_dereference();
    184    if (!deref)
    185       return;
    186 
    187    ir_variable *var = ir->lhs->variable_referenced();
    188    if (!var || var->data.mode != ir_var_shader_shared)
    189       return;
    190 
    191    buffer_access_type = shared_store_access;
    192 
    193    /* We have a write to a shared variable, so declare a temporary and rewrite
    194     * the assignment so that the temporary is the LHS.
    195     */
    196    void *mem_ctx = ralloc_parent(shader->ir);
    197 
    198    const glsl_type *type = rvalue->type;
    199    ir_variable *store_var = new(mem_ctx) ir_variable(type,
    200                                                      "shared_store_temp",
    201                                                      ir_var_temporary);
    202    base_ir->insert_before(store_var);
    203    ir->lhs = new(mem_ctx) ir_dereference_variable(store_var);
    204 
    205    ir_rvalue *offset = NULL;
    206    unsigned const_offset = get_shared_offset(var);
    207    bool row_major;
    208    const glsl_type *matrix_type;
    209    assert(var->get_interface_type() == NULL);
    210    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
    211 
    212    setup_buffer_access(mem_ctx, deref,
    213                        &offset, &const_offset,
    214                        &row_major, &matrix_type, NULL, packing);
    215 
    216    deref = new(mem_ctx) ir_dereference_variable(store_var);
    217 
    218    ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
    219                                                         "shared_store_temp_offset",
    220                                                         ir_var_temporary);
    221    base_ir->insert_before(store_offset);
    222    base_ir->insert_before(assign(store_offset, offset));
    223 
    224    /* Now we have to write the value assigned to the temporary back to memory */
    225    emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major,
    226                matrix_type, packing, ir->write_mask);
    227 
    228    progress = true;
    229 }
    230 
    231 ir_visitor_status
    232 lower_shared_reference_visitor::visit_enter(ir_assignment *ir)
    233 {
    234    handle_assignment(ir);
    235    return rvalue_visit(ir);
    236 }
    237 
    238 void
    239 lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx,
    240                                                      ir_dereference *deref,
    241                                                      const glsl_type *type,
    242                                                      ir_rvalue *offset,
    243                                                      unsigned mask,
    244                                                      int /* channel */)
    245 {
    246    if (buffer_access_type == shared_store_access) {
    247       ir_call *store = shared_store(mem_ctx, deref, offset, mask);
    248       base_ir->insert_after(store);
    249    } else {
    250       ir_call *load = shared_load(mem_ctx, type, offset);
    251       base_ir->insert_before(load);
    252       ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL);
    253       base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
    254                                     value));
    255    }
    256 }
    257 
    258 static bool
    259 compute_shader_enabled(const _mesa_glsl_parse_state *state)
    260 {
    261    return state->stage == MESA_SHADER_COMPUTE;
    262 }
    263 
    264 ir_call *
    265 lower_shared_reference_visitor::shared_store(void *mem_ctx,
    266                                              ir_rvalue *deref,
    267                                              ir_rvalue *offset,
    268                                              unsigned write_mask)
    269 {
    270    exec_list sig_params;
    271 
    272    ir_variable *offset_ref = new(mem_ctx)
    273       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
    274    sig_params.push_tail(offset_ref);
    275 
    276    ir_variable *val_ref = new(mem_ctx)
    277       ir_variable(deref->type, "value" , ir_var_function_in);
    278    sig_params.push_tail(val_ref);
    279 
    280    ir_variable *writemask_ref = new(mem_ctx)
    281       ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
    282    sig_params.push_tail(writemask_ref);
    283 
    284    ir_function_signature *sig = new(mem_ctx)
    285       ir_function_signature(glsl_type::void_type, compute_shader_enabled);
    286    assert(sig);
    287    sig->replace_parameters(&sig_params);
    288    sig->intrinsic_id = ir_intrinsic_shared_store;
    289 
    290    ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared");
    291    f->add_signature(sig);
    292 
    293    exec_list call_params;
    294    call_params.push_tail(offset->clone(mem_ctx, NULL));
    295    call_params.push_tail(deref->clone(mem_ctx, NULL));
    296    call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
    297    return new(mem_ctx) ir_call(sig, NULL, &call_params);
    298 }
    299 
    300 ir_call *
    301 lower_shared_reference_visitor::shared_load(void *mem_ctx,
    302                                             const struct glsl_type *type,
    303                                             ir_rvalue *offset)
    304 {
    305    exec_list sig_params;
    306 
    307    ir_variable *offset_ref = new(mem_ctx)
    308       ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
    309    sig_params.push_tail(offset_ref);
    310 
    311    ir_function_signature *sig =
    312       new(mem_ctx) ir_function_signature(type, compute_shader_enabled);
    313    assert(sig);
    314    sig->replace_parameters(&sig_params);
    315    sig->intrinsic_id = ir_intrinsic_shared_load;
    316 
    317    ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared");
    318    f->add_signature(sig);
    319 
    320    ir_variable *result = new(mem_ctx)
    321       ir_variable(type, "shared_load_result", ir_var_temporary);
    322    base_ir->insert_before(result);
    323    ir_dereference_variable *deref_result = new(mem_ctx)
    324       ir_dereference_variable(result);
    325 
    326    exec_list call_params;
    327    call_params.push_tail(offset->clone(mem_ctx, NULL));
    328 
    329    return new(mem_ctx) ir_call(sig, deref_result, &call_params);
    330 }
    331 
    332 /* Lowers the intrinsic call to a new internal intrinsic that swaps the access
    333  * to the shared variable in the first parameter by an offset. This involves
    334  * creating the new internal intrinsic (i.e. the new function signature).
    335  */
    336 ir_call *
    337 lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
    338 {
    339    /* Shared atomics usually have 2 parameters, the shared variable and an
    340     * integer argument. The exception is CompSwap, that has an additional
    341     * integer parameter.
    342     */
    343    int param_count = ir->actual_parameters.length();
    344    assert(param_count == 2 || param_count == 3);
    345 
    346    /* First argument must be a scalar integer shared variable */
    347    exec_node *param = ir->actual_parameters.get_head();
    348    ir_instruction *inst = (ir_instruction *) param;
    349    assert(inst->ir_type == ir_type_dereference_variable ||
    350           inst->ir_type == ir_type_dereference_array ||
    351           inst->ir_type == ir_type_dereference_record ||
    352           inst->ir_type == ir_type_swizzle);
    353 
    354    ir_rvalue *deref = (ir_rvalue *) inst;
    355    assert(deref->type->is_scalar() && deref->type->is_integer());
    356 
    357    ir_variable *var = deref->variable_referenced();
    358    assert(var);
    359 
    360    /* Compute the offset to the start if the dereference
    361     */
    362    void *mem_ctx = ralloc_parent(shader->ir);
    363 
    364    ir_rvalue *offset = NULL;
    365    unsigned const_offset = get_shared_offset(var);
    366    bool row_major;
    367    const glsl_type *matrix_type;
    368    assert(var->get_interface_type() == NULL);
    369    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
    370    buffer_access_type = shared_atomic_access;
    371 
    372    setup_buffer_access(mem_ctx, deref,
    373                        &offset, &const_offset,
    374                        &row_major, &matrix_type, NULL, packing);
    375 
    376    assert(offset);
    377    assert(!row_major);
    378    assert(matrix_type == NULL);
    379 
    380    ir_rvalue *deref_offset =
    381       add(offset, new(mem_ctx) ir_constant(const_offset));
    382 
    383    /* Create the new internal function signature that will take an offset
    384     * instead of a shared variable
    385     */
    386    exec_list sig_params;
    387    ir_variable *sig_param = new(mem_ctx)
    388       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
    389    sig_params.push_tail(sig_param);
    390 
    391    const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
    392       glsl_type::int_type : glsl_type::uint_type;
    393    sig_param = new(mem_ctx)
    394          ir_variable(type, "data1", ir_var_function_in);
    395    sig_params.push_tail(sig_param);
    396 
    397    if (param_count == 3) {
    398       sig_param = new(mem_ctx)
    399             ir_variable(type, "data2", ir_var_function_in);
    400       sig_params.push_tail(sig_param);
    401    }
    402 
    403    ir_function_signature *sig =
    404       new(mem_ctx) ir_function_signature(deref->type,
    405                                          compute_shader_enabled);
    406    assert(sig);
    407    sig->replace_parameters(&sig_params);
    408 
    409    assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
    410    assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
    411    sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, shared);
    412 
    413    char func_name[64];
    414    sprintf(func_name, "%s_shared", ir->callee_name());
    415    ir_function *f = new(mem_ctx) ir_function(func_name);
    416    f->add_signature(sig);
    417 
    418    /* Now, create the call to the internal intrinsic */
    419    exec_list call_params;
    420    call_params.push_tail(deref_offset);
    421    param = ir->actual_parameters.get_head()->get_next();
    422    ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
    423    call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
    424    if (param_count == 3) {
    425       param = param->get_next();
    426       param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
    427       call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
    428    }
    429    ir_dereference_variable *return_deref =
    430       ir->return_deref->clone(mem_ctx, NULL);
    431    return new(mem_ctx) ir_call(sig, return_deref, &call_params);
    432 }
    433 
    434 ir_call *
    435 lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir)
    436 {
    437    exec_list& params = ir->actual_parameters;
    438 
    439    if (params.length() < 2 || params.length() > 3)
    440       return ir;
    441 
    442    ir_rvalue *rvalue =
    443       ((ir_instruction *) params.get_head())->as_rvalue();
    444    if (!rvalue)
    445       return ir;
    446 
    447    ir_variable *var = rvalue->variable_referenced();
    448    if (!var || var->data.mode != ir_var_shader_shared)
    449       return ir;
    450 
    451    const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
    452    if (id == ir_intrinsic_generic_atomic_add ||
    453        id == ir_intrinsic_generic_atomic_min ||
    454        id == ir_intrinsic_generic_atomic_max ||
    455        id == ir_intrinsic_generic_atomic_and ||
    456        id == ir_intrinsic_generic_atomic_or ||
    457        id == ir_intrinsic_generic_atomic_xor ||
    458        id == ir_intrinsic_generic_atomic_exchange ||
    459        id == ir_intrinsic_generic_atomic_comp_swap) {
    460       return lower_shared_atomic_intrinsic(ir);
    461    }
    462 
    463    return ir;
    464 }
    465 
    466 ir_visitor_status
    467 lower_shared_reference_visitor::visit_enter(ir_call *ir)
    468 {
    469    ir_call *new_ir = check_for_shared_atomic_intrinsic(ir);
    470    if (new_ir != ir) {
    471       progress = true;
    472       base_ir->replace_with(new_ir);
    473       return visit_continue_with_parent;
    474    }
    475 
    476    return rvalue_visit(ir);
    477 }
    478 
    479 } /* unnamed namespace */
    480 
    481 void
    482 lower_shared_reference(struct gl_context *ctx,
    483                        struct gl_shader_program *prog,
    484                        struct gl_linked_shader *shader)
    485 {
    486    if (shader->Stage != MESA_SHADER_COMPUTE)
    487       return;
    488 
    489    lower_shared_reference_visitor v(shader);
    490 
    491    /* Loop over the instructions lowering references, because we take a deref
    492     * of an shared variable array using a shared variable dereference as the
    493     * index will produce a collection of instructions all of which have cloned
    494     * shared variable dereferences for that array index.
    495     */
    496    do {
    497       v.progress = false;
    498       visit_list_elements(&v, shader->ir);
    499    } while (v.progress);
    500 
    501    prog->Comp.SharedSize = v.shared_size;
    502 
    503    /* Section 19.1 (Compute Shader Variables) of the OpenGL 4.5 (Core Profile)
    504     * specification says:
    505     *
    506     *   "There is a limit to the total size of all variables declared as
    507     *    shared in a single program object. This limit, expressed in units of
    508     *    basic machine units, may be queried as the value of
    509     *    MAX_COMPUTE_SHARED_MEMORY_SIZE."
    510     */
    511    if (prog->Comp.SharedSize > ctx->Const.MaxComputeSharedMemorySize) {
    512       linker_error(prog, "Too much shared memory used (%u/%u)\n",
    513                    prog->Comp.SharedSize,
    514                    ctx->Const.MaxComputeSharedMemorySize);
    515    }
    516 }
    517