Home | History | Annotate | Download | only in softpipe
      1 /*
      2  * Copyright 2016 Red Hat.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 #include "util/u_inlines.h"
     24 #include "util/u_math.h"
     25 #include "util/u_memory.h"
     26 #include "util/u_pstipple.h"
     27 #include "pipe/p_shader_tokens.h"
     28 #include "draw/draw_context.h"
     29 #include "draw/draw_vertex.h"
     30 #include "sp_context.h"
     31 #include "sp_screen.h"
     32 #include "sp_state.h"
     33 #include "sp_texture.h"
     34 #include "sp_tex_sample.h"
     35 #include "sp_tex_tile_cache.h"
     36 #include "tgsi/tgsi_parse.h"
     37 
     38 static void
     39 cs_prepare(const struct sp_compute_shader *cs,
     40            struct tgsi_exec_machine *machine,
     41            int w, int h, int d,
     42            int g_w, int g_h, int g_d,
     43            int b_w, int b_h, int b_d,
     44            struct tgsi_sampler *sampler,
     45            struct tgsi_image *image,
     46            struct tgsi_buffer *buffer )
     47 {
     48    int j;
     49    /*
     50     * Bind tokens/shader to the interpreter's machine state.
     51     */
     52    tgsi_exec_machine_bind_shader(machine,
     53                                  cs->tokens,
     54                                  sampler, image, buffer);
     55 
     56    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
     57       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
     58       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     59          machine->SystemValue[i].xyzw[0].i[j] = w;
     60          machine->SystemValue[i].xyzw[1].i[j] = h;
     61          machine->SystemValue[i].xyzw[2].i[j] = d;
     62       }
     63    }
     64 
     65    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
     66       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
     67       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     68          machine->SystemValue[i].xyzw[0].i[j] = g_w;
     69          machine->SystemValue[i].xyzw[1].i[j] = g_h;
     70          machine->SystemValue[i].xyzw[2].i[j] = g_d;
     71       }
     72    }
     73 
     74    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
     75       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
     76       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     77          machine->SystemValue[i].xyzw[0].i[j] = b_w;
     78          machine->SystemValue[i].xyzw[1].i[j] = b_h;
     79          machine->SystemValue[i].xyzw[2].i[j] = b_d;
     80       }
     81    }
     82 }
     83 
     84 static bool
     85 cs_run(const struct sp_compute_shader *cs,
     86        int g_w, int g_h, int g_d,
     87        struct tgsi_exec_machine *machine, bool restart)
     88 {
     89    if (!restart) {
     90       if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
     91          unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
     92          int j;
     93          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     94             machine->SystemValue[i].xyzw[0].i[j] = g_w;
     95             machine->SystemValue[i].xyzw[1].i[j] = g_h;
     96             machine->SystemValue[i].xyzw[2].i[j] = g_d;
     97          }
     98       }
     99       machine->NonHelperMask = (1 << 1) - 1;
    100    }
    101 
    102    tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
    103 
    104    if (machine->pc != -1)
    105       return true;
    106    return false;
    107 }
    108 
    109 static void
    110 run_workgroup(const struct sp_compute_shader *cs,
    111               int g_w, int g_h, int g_d, int num_threads,
    112               struct tgsi_exec_machine **machines)
    113 {
    114    int i;
    115    bool grp_hit_barrier, restart_threads = false;
    116 
    117    do {
    118       grp_hit_barrier = false;
    119       for (i = 0; i < num_threads; i++) {
    120          grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
    121       }
    122       restart_threads = false;
    123       if (grp_hit_barrier) {
    124          grp_hit_barrier = false;
    125          restart_threads = true;
    126       }
    127    } while (restart_threads);
    128 }
    129 
    130 static void
    131 cs_delete(const struct sp_compute_shader *cs,
    132           struct tgsi_exec_machine *machine)
    133 {
    134    if (machine->Tokens == cs->tokens) {
    135       tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
    136    }
    137 }
    138 
    139 static void
    140 fill_grid_size(struct pipe_context *context,
    141                const struct pipe_grid_info *info,
    142                uint32_t grid_size[3])
    143 {
    144    struct pipe_transfer *transfer;
    145    uint32_t *params;
    146    if (!info->indirect) {
    147       grid_size[0] = info->grid[0];
    148       grid_size[1] = info->grid[1];
    149       grid_size[2] = info->grid[2];
    150       return;
    151    }
    152    params = pipe_buffer_map_range(context, info->indirect,
    153                                   info->indirect_offset,
    154                                   3 * sizeof(uint32_t),
    155                                   PIPE_TRANSFER_READ,
    156                                   &transfer);
    157 
    158    if (!transfer)
    159       return;
    160 
    161    grid_size[0] = params[0];
    162    grid_size[1] = params[1];
    163    grid_size[2] = params[2];
    164    pipe_buffer_unmap(context, transfer);
    165 }
    166 
    167 void
    168 softpipe_launch_grid(struct pipe_context *context,
    169                      const struct pipe_grid_info *info)
    170 {
    171    struct softpipe_context *softpipe = softpipe_context(context);
    172    struct sp_compute_shader *cs = softpipe->cs;
    173    int num_threads_in_group;
    174    struct tgsi_exec_machine **machines;
    175    int bwidth, bheight, bdepth;
    176    int w, h, d, i;
    177    int g_w, g_h, g_d;
    178    uint32_t grid_size[3];
    179    void *local_mem = NULL;
    180 
    181    softpipe_update_compute_samplers(softpipe);
    182    bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
    183    bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
    184    bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
    185    num_threads_in_group = bwidth * bheight * bdepth;
    186 
    187    fill_grid_size(context, info, grid_size);
    188 
    189    if (cs->shader.req_local_mem) {
    190       local_mem = CALLOC(1, cs->shader.req_local_mem);
    191    }
    192 
    193    machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
    194    if (!machines) {
    195       FREE(local_mem);
    196       return;
    197    }
    198 
    199    /* initialise machines + GRID_SIZE + THREAD_ID  + BLOCK_SIZE */
    200    for (d = 0; d < bdepth; d++) {
    201       for (h = 0; h < bheight; h++) {
    202          for (w = 0; w < bwidth; w++) {
    203             int idx = w + (h * bwidth) + (d * bheight * bwidth);
    204             machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
    205 
    206             machines[idx]->LocalMem = local_mem;
    207             machines[idx]->LocalMemSize = cs->shader.req_local_mem;
    208             cs_prepare(cs, machines[idx],
    209                        w, h, d,
    210                        grid_size[0], grid_size[1], grid_size[2],
    211                        bwidth, bheight, bdepth,
    212                        (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
    213                        (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
    214                        (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
    215             tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
    216                                            softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
    217                                            softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
    218          }
    219       }
    220    }
    221 
    222    for (g_d = 0; g_d < grid_size[2]; g_d++) {
    223       for (g_h = 0; g_h < grid_size[1]; g_h++) {
    224          for (g_w = 0; g_w < grid_size[0]; g_w++) {
    225             run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
    226          }
    227       }
    228    }
    229 
    230    for (i = 0; i < num_threads_in_group; i++) {
    231       cs_delete(cs, machines[i]);
    232       tgsi_exec_machine_destroy(machines[i]);
    233    }
    234 
    235    FREE(local_mem);
    236    FREE(machines);
    237 }
    238