Home | History | Annotate | Download | only in draw
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 #include "draw_gs.h"
     29 
     30 #include "draw_private.h"
     31 #include "draw_context.h"
     32 #ifdef HAVE_LLVM
     33 #include "draw_llvm.h"
     34 #endif
     35 
     36 #include "tgsi/tgsi_parse.h"
     37 #include "tgsi/tgsi_exec.h"
     38 
     39 #include "pipe/p_shader_tokens.h"
     40 
     41 #include "util/u_math.h"
     42 #include "util/u_memory.h"
     43 #include "util/u_prim.h"
     44 
     45 /* fixme: move it from here */
     46 #define MAX_PRIMITIVES 64
     47 
     48 static inline int
     49 draw_gs_get_input_index(int semantic, int index,
     50                         const struct tgsi_shader_info *input_info)
     51 {
     52    int i;
     53    const ubyte *input_semantic_names = input_info->output_semantic_name;
     54    const ubyte *input_semantic_indices = input_info->output_semantic_index;
     55    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
     56       if (input_semantic_names[i] == semantic &&
     57           input_semantic_indices[i] == index)
     58          return i;
     59    }
     60    return -1;
     61 }
     62 
     63 /**
     64  * We execute geometry shaders in the SOA mode, so ideally we want to
     65  * flush when the number of currently fetched primitives is equal to
     66  * the number of elements in the SOA vector. This ensures that the
     67  * throughput is optimized for the given vector instruction set.
     68  */
     69 static inline boolean
     70 draw_gs_should_flush(struct draw_geometry_shader *shader)
     71 {
     72    return (shader->fetched_prim_count == shader->vector_length);
     73 }
     74 
     75 /*#define DEBUG_OUTPUTS 1*/
     76 static void
     77 tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
     78                       unsigned num_primitives,
     79                       float (**p_output)[4])
     80 {
     81    struct tgsi_exec_machine *machine = shader->machine;
     82    unsigned prim_idx, j, slot;
     83    unsigned current_idx = 0;
     84    float (*output)[4];
     85 
     86    output = *p_output;
     87 
     88    /* Unswizzle all output results.
     89     */
     90 
     91    for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
     92       unsigned num_verts_per_prim = machine->Primitives[prim_idx];
     93       shader->primitive_lengths[prim_idx + shader->emitted_primitives] =
     94          machine->Primitives[prim_idx];
     95       shader->emitted_vertices += num_verts_per_prim;
     96       for (j = 0; j < num_verts_per_prim; j++, current_idx++) {
     97          int idx = current_idx * shader->info.num_outputs;
     98 #ifdef DEBUG_OUTPUTS
     99          debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs);
    100 #endif
    101          for (slot = 0; slot < shader->info.num_outputs; slot++) {
    102             output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0];
    103             output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0];
    104             output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0];
    105             output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0];
    106 #ifdef DEBUG_OUTPUTS
    107             debug_printf("\t%d: %f %f %f %f\n", slot,
    108                          output[slot][0],
    109                          output[slot][1],
    110                          output[slot][2],
    111                          output[slot][3]);
    112 #endif
    113          }
    114          output = (float (*)[4])((char *)output + shader->vertex_size);
    115       }
    116    }
    117    *p_output = output;
    118    shader->emitted_primitives += num_primitives;
    119 }
    120 
    121 /*#define DEBUG_INPUTS 1*/
    122 static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader,
    123                                 unsigned *indices,
    124                                 unsigned num_vertices,
    125                                 unsigned prim_idx)
    126 {
    127    struct tgsi_exec_machine *machine = shader->machine;
    128    unsigned slot, i;
    129    int vs_slot;
    130    unsigned input_vertex_stride = shader->input_vertex_stride;
    131    const float (*input_ptr)[4];
    132 
    133    input_ptr = shader->input;
    134 
    135    for (i = 0; i < num_vertices; ++i) {
    136       const float (*input)[4];
    137 #if DEBUG_INPUTS
    138       debug_printf("%d) vertex index = %d (prim idx = %d)\n",
    139                    i, indices[i], prim_idx);
    140 #endif
    141       input = (const float (*)[4])(
    142          (const char *)input_ptr + (indices[i] * input_vertex_stride));
    143       for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
    144          unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot;
    145          if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
    146             machine->Inputs[idx].xyzw[0].u[prim_idx] = shader->in_prim_idx;
    147             machine->Inputs[idx].xyzw[1].u[prim_idx] = shader->in_prim_idx;
    148             machine->Inputs[idx].xyzw[2].u[prim_idx] = shader->in_prim_idx;
    149             machine->Inputs[idx].xyzw[3].u[prim_idx] = shader->in_prim_idx;
    150          } else {
    151             vs_slot = draw_gs_get_input_index(
    152                shader->info.input_semantic_name[slot],
    153                shader->info.input_semantic_index[slot],
    154                shader->input_info);
    155             if (vs_slot < 0) {
    156                debug_printf("VS/GS signature mismatch!\n");
    157                machine->Inputs[idx].xyzw[0].f[prim_idx] = 0;
    158                machine->Inputs[idx].xyzw[1].f[prim_idx] = 0;
    159                machine->Inputs[idx].xyzw[2].f[prim_idx] = 0;
    160                machine->Inputs[idx].xyzw[3].f[prim_idx] = 0;
    161             } else {
    162 #if DEBUG_INPUTS
    163                debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
    164                             slot, vs_slot, idx);
    165                assert(!util_is_inf_or_nan(input[vs_slot][0]));
    166                assert(!util_is_inf_or_nan(input[vs_slot][1]));
    167                assert(!util_is_inf_or_nan(input[vs_slot][2]));
    168                assert(!util_is_inf_or_nan(input[vs_slot][3]));
    169 #endif
    170                machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0];
    171                machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1];
    172                machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2];
    173                machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3];
    174 #if DEBUG_INPUTS
    175                debug_printf("\t\t%f %f %f %f\n",
    176                             machine->Inputs[idx].xyzw[0].f[prim_idx],
    177                             machine->Inputs[idx].xyzw[1].f[prim_idx],
    178                             machine->Inputs[idx].xyzw[2].f[prim_idx],
    179                             machine->Inputs[idx].xyzw[3].f[prim_idx]);
    180 #endif
    181                ++vs_slot;
    182             }
    183          }
    184       }
    185    }
    186 }
    187 
    188 static void tgsi_gs_prepare(struct draw_geometry_shader *shader,
    189                             const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
    190                             const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
    191 {
    192    struct tgsi_exec_machine *machine = shader->machine;
    193    int j;
    194    tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
    195                                   constants, constants_size);
    196 
    197    if (shader->info.uses_invocationid) {
    198       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INVOCATIONID];
    199       for (j = 0; j < TGSI_QUAD_SIZE; j++)
    200          machine->SystemValue[i].xyzw[0].i[j] = shader->invocation_id;
    201    }
    202 }
    203 
    204 static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
    205                             unsigned input_primitives)
    206 {
    207    struct tgsi_exec_machine *machine = shader->machine;
    208 
    209    /* run interpreter */
    210    tgsi_exec_machine_run(machine, 0);
    211 
    212    return
    213       machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
    214 }
    215 
    216 #ifdef HAVE_LLVM
    217 
    218 static void
    219 llvm_fetch_gs_input(struct draw_geometry_shader *shader,
    220                     unsigned *indices,
    221                     unsigned num_vertices,
    222                     unsigned prim_idx)
    223 {
    224    unsigned slot, i;
    225    int vs_slot;
    226    unsigned input_vertex_stride = shader->input_vertex_stride;
    227    const float (*input_ptr)[4];
    228    float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data;
    229 
    230    shader->llvm_prim_ids[shader->fetched_prim_count] = shader->in_prim_idx;
    231 
    232    input_ptr = shader->input;
    233 
    234    for (i = 0; i < num_vertices; ++i) {
    235       const float (*input)[4];
    236 #if DEBUG_INPUTS
    237       debug_printf("%d) vertex index = %d (prim idx = %d)\n",
    238                    i, indices[i], prim_idx);
    239 #endif
    240       input = (const float (*)[4])(
    241          (const char *)input_ptr + (indices[i] * input_vertex_stride));
    242       for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
    243          if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
    244             /* skip. we handle system values through gallivm */
    245             /* NOTE: If we hit this case here it's an ordinary input not a sv,
    246              * even though it probably should be a sv.
    247              * Not sure how to set it up as regular input however if that even,
    248              * would make sense so hack around this later in gallivm.
    249              */
    250          } else {
    251             vs_slot = draw_gs_get_input_index(
    252                shader->info.input_semantic_name[slot],
    253                shader->info.input_semantic_index[slot],
    254                shader->input_info);
    255             if (vs_slot < 0) {
    256                debug_printf("VS/GS signature mismatch!\n");
    257                (*input_data)[i][slot][0][prim_idx] = 0;
    258                (*input_data)[i][slot][1][prim_idx] = 0;
    259                (*input_data)[i][slot][2][prim_idx] = 0;
    260                (*input_data)[i][slot][3][prim_idx] = 0;
    261             } else {
    262 #if DEBUG_INPUTS
    263                debug_printf("\tSlot = %d, vs_slot = %d, i = %d:\n",
    264                             slot, vs_slot, i);
    265                assert(!util_is_inf_or_nan(input[vs_slot][0]));
    266                assert(!util_is_inf_or_nan(input[vs_slot][1]));
    267                assert(!util_is_inf_or_nan(input[vs_slot][2]));
    268                assert(!util_is_inf_or_nan(input[vs_slot][3]));
    269 #endif
    270                (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0];
    271                (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1];
    272                (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2];
    273                (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3];
    274 #if DEBUG_INPUTS
    275                debug_printf("\t\t%f %f %f %f\n",
    276                             (*input_data)[i][slot][0][prim_idx],
    277                             (*input_data)[i][slot][1][prim_idx],
    278                             (*input_data)[i][slot][2][prim_idx],
    279                             (*input_data)[i][slot][3][prim_idx]);
    280 #endif
    281                ++vs_slot;
    282             }
    283          }
    284       }
    285    }
    286 }
    287 
    288 static void
    289 llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
    290                       unsigned num_primitives,
    291                       float (**p_output)[4])
    292 {
    293    int total_verts = 0;
    294    int vertex_count = 0;
    295    int total_prims = 0;
    296    int max_prims_per_invocation = 0;
    297    char *output_ptr = (char*)shader->gs_output;
    298    int i, j, prim_idx;
    299    unsigned next_prim_boundary = shader->primitive_boundary;
    300 
    301    for (i = 0; i < shader->vector_length; ++i) {
    302       int prims = shader->llvm_emitted_primitives[i];
    303       total_prims += prims;
    304       max_prims_per_invocation = MAX2(max_prims_per_invocation, prims);
    305    }
    306    for (i = 0; i < shader->vector_length; ++i) {
    307       total_verts += shader->llvm_emitted_vertices[i];
    308    }
    309 
    310    output_ptr += shader->emitted_vertices * shader->vertex_size;
    311    for (i = 0; i < shader->vector_length - 1; ++i) {
    312       int current_verts = shader->llvm_emitted_vertices[i];
    313       int next_verts = shader->llvm_emitted_vertices[i + 1];
    314 #if 0
    315       int j;
    316       for (j = 0; j < current_verts; ++j) {
    317          struct vertex_header *vh = (struct vertex_header *)
    318             (output_ptr + shader->vertex_size * (i * next_prim_boundary + j));
    319          debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count,
    320                       vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]);
    321 
    322       }
    323 #endif
    324       debug_assert(current_verts <= shader->max_output_vertices);
    325       debug_assert(next_verts <= shader->max_output_vertices);
    326       if (next_verts) {
    327          memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
    328                  output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size,
    329                  shader->vertex_size * next_verts);
    330       }
    331       vertex_count += current_verts;
    332    }
    333 
    334 #if 0
    335    {
    336       int i;
    337       for (i = 0; i < total_verts; ++i) {
    338          struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i);
    339          debug_printf("%d) Vertex:\n", i);
    340          for (j = 0; j < shader->info.num_outputs; ++j) {
    341             unsigned *udata = (unsigned*)vh->data[j];
    342             debug_printf("    %d) [%f, %f, %f, %f] [%d, %d, %d, %d]\n", j,
    343                          vh->data[j][0], vh->data[j][1], vh->data[j][2], vh->data[j][3],
    344                          udata[0], udata[1], udata[2], udata[3]);
    345          }
    346 
    347       }
    348    }
    349 #endif
    350 
    351    prim_idx = 0;
    352    for (i = 0; i < shader->vector_length; ++i) {
    353       int num_prims = shader->llvm_emitted_primitives[i];
    354       for (j = 0; j < num_prims; ++j) {
    355          int prim_length =
    356             shader->llvm_prim_lengths[j][i];
    357          shader->primitive_lengths[shader->emitted_primitives + prim_idx] =
    358             prim_length;
    359          ++prim_idx;
    360       }
    361    }
    362 
    363    shader->emitted_primitives += total_prims;
    364    shader->emitted_vertices += total_verts;
    365 }
    366 
    367 static void
    368 llvm_gs_prepare(struct draw_geometry_shader *shader,
    369                 const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
    370                 const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
    371 {
    372 }
    373 
    374 static unsigned
    375 llvm_gs_run(struct draw_geometry_shader *shader,
    376             unsigned input_primitives)
    377 {
    378    unsigned ret;
    379    char *input = (char*)shader->gs_output;
    380 
    381    input += (shader->emitted_vertices * shader->vertex_size);
    382 
    383    ret = shader->current_variant->jit_func(
    384       shader->jit_context, shader->gs_input->data,
    385       (struct vertex_header*)input,
    386       input_primitives,
    387       shader->draw->instance_id,
    388       shader->llvm_prim_ids,
    389       shader->invocation_id);
    390 
    391    return ret;
    392 }
    393 
    394 #endif
    395 
    396 static void gs_flush(struct draw_geometry_shader *shader)
    397 {
    398    unsigned out_prim_count;
    399 
    400    unsigned input_primitives = shader->fetched_prim_count;
    401 
    402    if (shader->draw->collect_statistics) {
    403       shader->draw->statistics.gs_invocations += input_primitives;
    404    }
    405 
    406    debug_assert(input_primitives > 0 &&
    407                 input_primitives <= 4);
    408 
    409    out_prim_count = shader->run(shader, input_primitives);
    410    shader->fetch_outputs(shader, out_prim_count,
    411                          &shader->tmp_output);
    412 
    413 #if 0
    414    debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
    415                 shader->emitted_primitives, shader->emitted_vertices,
    416                 out_prim_count);
    417 #endif
    418 
    419    shader->fetched_prim_count = 0;
    420 }
    421 
    422 static void gs_point(struct draw_geometry_shader *shader,
    423                      int idx)
    424 {
    425    unsigned indices[1];
    426 
    427    indices[0] = idx;
    428 
    429    shader->fetch_inputs(shader, indices, 1,
    430                         shader->fetched_prim_count);
    431    ++shader->in_prim_idx;
    432    ++shader->fetched_prim_count;
    433 
    434    if (draw_gs_should_flush(shader))
    435       gs_flush(shader);
    436 }
    437 
    438 static void gs_line(struct draw_geometry_shader *shader,
    439                     int i0, int i1)
    440 {
    441    unsigned indices[2];
    442 
    443    indices[0] = i0;
    444    indices[1] = i1;
    445 
    446    shader->fetch_inputs(shader, indices, 2,
    447                         shader->fetched_prim_count);
    448    ++shader->in_prim_idx;
    449    ++shader->fetched_prim_count;
    450 
    451    if (draw_gs_should_flush(shader))
    452       gs_flush(shader);
    453 }
    454 
    455 static void gs_line_adj(struct draw_geometry_shader *shader,
    456                         int i0, int i1, int i2, int i3)
    457 {
    458    unsigned indices[4];
    459 
    460    indices[0] = i0;
    461    indices[1] = i1;
    462    indices[2] = i2;
    463    indices[3] = i3;
    464 
    465    shader->fetch_inputs(shader, indices, 4,
    466                         shader->fetched_prim_count);
    467    ++shader->in_prim_idx;
    468    ++shader->fetched_prim_count;
    469 
    470    if (draw_gs_should_flush(shader))
    471       gs_flush(shader);
    472 }
    473 
    474 static void gs_tri(struct draw_geometry_shader *shader,
    475                    int i0, int i1, int i2)
    476 {
    477    unsigned indices[3];
    478 
    479    indices[0] = i0;
    480    indices[1] = i1;
    481    indices[2] = i2;
    482 
    483    shader->fetch_inputs(shader, indices, 3,
    484                         shader->fetched_prim_count);
    485    ++shader->in_prim_idx;
    486    ++shader->fetched_prim_count;
    487 
    488    if (draw_gs_should_flush(shader))
    489       gs_flush(shader);
    490 }
    491 
    492 static void gs_tri_adj(struct draw_geometry_shader *shader,
    493                        int i0, int i1, int i2,
    494                        int i3, int i4, int i5)
    495 {
    496    unsigned indices[6];
    497 
    498    indices[0] = i0;
    499    indices[1] = i1;
    500    indices[2] = i2;
    501    indices[3] = i3;
    502    indices[4] = i4;
    503    indices[5] = i5;
    504 
    505    shader->fetch_inputs(shader, indices, 6,
    506                         shader->fetched_prim_count);
    507    ++shader->in_prim_idx;
    508    ++shader->fetched_prim_count;
    509 
    510    if (draw_gs_should_flush(shader))
    511       gs_flush(shader);
    512 }
    513 
    514 #define FUNC         gs_run
    515 #define GET_ELT(idx) (idx)
    516 #include "draw_gs_tmp.h"
    517 
    518 
    519 #define FUNC         gs_run_elts
    520 #define LOCAL_VARS   const ushort *elts = input_prims->elts;
    521 #define GET_ELT(idx) (elts[idx])
    522 #include "draw_gs_tmp.h"
    523 
    524 
    525 /**
    526  * Execute geometry shader.
    527  */
    528 int draw_geometry_shader_run(struct draw_geometry_shader *shader,
    529                              const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
    530                              const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
    531                              const struct draw_vertex_info *input_verts,
    532                              const struct draw_prim_info *input_prim,
    533                              const struct tgsi_shader_info *input_info,
    534                              struct draw_vertex_info *output_verts,
    535                              struct draw_prim_info *output_prims )
    536 {
    537    const float (*input)[4] = (const float (*)[4])input_verts->verts->data;
    538    unsigned input_stride = input_verts->vertex_size;
    539    unsigned num_outputs = draw_total_gs_outputs(shader->draw);
    540    unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float);
    541    unsigned num_input_verts = input_prim->linear ?
    542       input_verts->count :
    543       input_prim->count;
    544    unsigned num_in_primitives =
    545       align(
    546          MAX2(u_decomposed_prims_for_vertices(input_prim->prim,
    547                                               num_input_verts),
    548               u_decomposed_prims_for_vertices(shader->input_primitive,
    549                                               num_input_verts)),
    550          shader->vector_length);
    551    unsigned max_out_prims =
    552       u_decomposed_prims_for_vertices(shader->output_primitive,
    553                                       shader->max_output_vertices)
    554       * num_in_primitives;
    555    /* we allocate exactly one extra vertex per primitive to allow the GS to emit
    556     * overflown vertices into some area where they won't harm anyone */
    557    unsigned total_verts_per_buffer = shader->primitive_boundary *
    558       num_in_primitives;
    559    unsigned invocation;
    560    //Assume at least one primitive
    561    max_out_prims = MAX2(max_out_prims, 1);
    562 
    563 
    564    output_verts->vertex_size = vertex_size;
    565    output_verts->stride = output_verts->vertex_size;
    566    output_verts->verts =
    567       (struct vertex_header *)MALLOC(output_verts->vertex_size *
    568                                      total_verts_per_buffer * shader->num_invocations);
    569    debug_assert(output_verts->verts);
    570 
    571 #if 0
    572    debug_printf("%s count = %d (in prims # = %d)\n",
    573                 __FUNCTION__, num_input_verts, num_in_primitives);
    574    debug_printf("\tlinear = %d, prim_info->count = %d\n",
    575                 input_prim->linear, input_prim->count);
    576    debug_printf("\tprim pipe = %s, shader in = %s, shader out = %s\n"
    577                 u_prim_name(input_prim->prim),
    578                 u_prim_name(shader->input_primitive),
    579                 u_prim_name(shader->output_primitive));
    580    debug_printf("\tmaxv  = %d, maxp = %d, primitive_boundary = %d, "
    581                 "vertex_size = %d, tverts = %d\n",
    582                 shader->max_output_vertices, max_out_prims,
    583                 shader->primitive_boundary, output_verts->vertex_size,
    584                 total_verts_per_buffer);
    585 #endif
    586 
    587    shader->emitted_vertices = 0;
    588    shader->emitted_primitives = 0;
    589    shader->vertex_size = vertex_size;
    590    shader->tmp_output = (float (*)[4])output_verts->verts->data;
    591    shader->fetched_prim_count = 0;
    592    shader->input_vertex_stride = input_stride;
    593    shader->input = input;
    594    shader->input_info = input_info;
    595    FREE(shader->primitive_lengths);
    596    shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * shader->num_invocations);
    597 
    598 
    599 #ifdef HAVE_LLVM
    600    if (shader->draw->llvm) {
    601       shader->gs_output = output_verts->verts;
    602       if (max_out_prims > shader->max_out_prims) {
    603          unsigned i;
    604          if (shader->llvm_prim_lengths) {
    605             for (i = 0; i < shader->max_out_prims; ++i) {
    606                align_free(shader->llvm_prim_lengths[i]);
    607             }
    608             FREE(shader->llvm_prim_lengths);
    609          }
    610 
    611          shader->llvm_prim_lengths = MALLOC(max_out_prims * sizeof(unsigned*));
    612          for (i = 0; i < max_out_prims; ++i) {
    613             int vector_size = shader->vector_length * sizeof(unsigned);
    614             shader->llvm_prim_lengths[i] =
    615                align_malloc(vector_size, vector_size);
    616          }
    617 
    618          shader->max_out_prims = max_out_prims;
    619       }
    620       shader->jit_context->prim_lengths = shader->llvm_prim_lengths;
    621       shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices;
    622       shader->jit_context->emitted_prims = shader->llvm_emitted_primitives;
    623    }
    624 #endif
    625 
    626    for (invocation = 0; invocation < shader->num_invocations; invocation++) {
    627       shader->invocation_id = invocation;
    628 
    629       shader->prepare(shader, constants, constants_size);
    630 
    631       if (input_prim->linear)
    632          gs_run(shader, input_prim, input_verts,
    633                 output_prims, output_verts);
    634       else
    635          gs_run_elts(shader, input_prim, input_verts,
    636                      output_prims, output_verts);
    637 
    638       /* Flush the remaining primitives. Will happen if
    639        * num_input_primitives % 4 != 0
    640        */
    641       if (shader->fetched_prim_count > 0) {
    642          gs_flush(shader);
    643       }
    644       debug_assert(shader->fetched_prim_count == 0);
    645    }
    646 
    647    /* Update prim_info:
    648     */
    649    output_prims->linear = TRUE;
    650    output_prims->elts = NULL;
    651    output_prims->start = 0;
    652    output_prims->count = shader->emitted_vertices;
    653    output_prims->prim = shader->output_primitive;
    654    output_prims->flags = 0x0;
    655    output_prims->primitive_lengths = shader->primitive_lengths;
    656    output_prims->primitive_count = shader->emitted_primitives;
    657    output_verts->count = shader->emitted_vertices;
    658 
    659    if (shader->draw->collect_statistics) {
    660       unsigned i;
    661       for (i = 0; i < shader->emitted_primitives; ++i) {
    662          shader->draw->statistics.gs_primitives +=
    663             u_decomposed_prims_for_vertices(shader->output_primitive,
    664                                             shader->primitive_lengths[i]);
    665       }
    666    }
    667 
    668 #if 0
    669    debug_printf("GS finished, prims = %d, verts = %d\n",
    670                 output_prims->primitive_count,
    671                 output_verts->count);
    672 #endif
    673 
    674    return shader->emitted_vertices;
    675 }
    676 
    677 void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
    678                                   struct draw_context *draw)
    679 {
    680    boolean use_llvm = draw->llvm != NULL;
    681    if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) {
    682       tgsi_exec_machine_bind_shader(shader->machine,
    683                                     shader->state.tokens,
    684                                     draw->gs.tgsi.sampler,
    685                                     draw->gs.tgsi.image,
    686                                     draw->gs.tgsi.buffer);
    687    }
    688 }
    689 
    690 
    691 boolean
    692 draw_gs_init( struct draw_context *draw )
    693 {
    694    if (!draw->llvm) {
    695       draw->gs.tgsi.machine = tgsi_exec_machine_create(PIPE_SHADER_GEOMETRY);
    696       if (!draw->gs.tgsi.machine)
    697          return FALSE;
    698 
    699       draw->gs.tgsi.machine->Primitives = align_malloc(
    700          MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
    701       if (!draw->gs.tgsi.machine->Primitives)
    702          return FALSE;
    703       memset(draw->gs.tgsi.machine->Primitives, 0,
    704              MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
    705    }
    706 
    707    return TRUE;
    708 }
    709 
    710 void draw_gs_destroy( struct draw_context *draw )
    711 {
    712    if (draw->gs.tgsi.machine) {
    713       align_free(draw->gs.tgsi.machine->Primitives);
    714       tgsi_exec_machine_destroy(draw->gs.tgsi.machine);
    715    }
    716 }
    717 
    718 struct draw_geometry_shader *
    719 draw_create_geometry_shader(struct draw_context *draw,
    720                             const struct pipe_shader_state *state)
    721 {
    722 #ifdef HAVE_LLVM
    723    boolean use_llvm = draw->llvm != NULL;
    724    struct llvm_geometry_shader *llvm_gs = NULL;
    725 #endif
    726    struct draw_geometry_shader *gs;
    727    unsigned i;
    728 
    729 #ifdef HAVE_LLVM
    730    if (use_llvm) {
    731       llvm_gs = CALLOC_STRUCT(llvm_geometry_shader);
    732 
    733       if (!llvm_gs)
    734          return NULL;
    735 
    736       gs = &llvm_gs->base;
    737 
    738       make_empty_list(&llvm_gs->variants);
    739    } else
    740 #endif
    741    {
    742       gs = CALLOC_STRUCT(draw_geometry_shader);
    743    }
    744 
    745    if (!gs)
    746       return NULL;
    747 
    748    gs->draw = draw;
    749    gs->state = *state;
    750    gs->state.tokens = tgsi_dup_tokens(state->tokens);
    751    if (!gs->state.tokens) {
    752       FREE(gs);
    753       return NULL;
    754    }
    755 
    756    tgsi_scan_shader(state->tokens, &gs->info);
    757 
    758    /* setup the defaults */
    759    gs->max_out_prims = 0;
    760 
    761 #ifdef HAVE_LLVM
    762    if (use_llvm) {
    763       /* TODO: change the input array to handle the following
    764          vector length, instead of the currently hardcoded
    765          TGSI_NUM_CHANNELS
    766       gs->vector_length = lp_native_vector_width / 32;*/
    767       gs->vector_length = TGSI_NUM_CHANNELS;
    768    } else
    769 #endif
    770    {
    771       gs->vector_length = 1;
    772    }
    773 
    774    gs->input_primitive =
    775          gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM];
    776    gs->output_primitive =
    777          gs->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
    778    gs->max_output_vertices =
    779          gs->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
    780    gs->num_invocations =
    781       gs->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
    782    if (!gs->max_output_vertices)
    783       gs->max_output_vertices = 32;
    784 
    785    /* Primitive boundary is bigger than max_output_vertices by one, because
    786     * the specification says that the geometry shader should exit if the
    787     * number of emitted vertices is bigger or equal to max_output_vertices and
    788     * we can't do that because we're running in the SoA mode, which means that
    789     * our storing routines will keep getting called on channels that have
    790     * overflown.
    791     * So we need some scratch area where we can keep writing the overflown
    792     * vertices without overwriting anything important or crashing.
    793     */
    794    gs->primitive_boundary = gs->max_output_vertices + 1;
    795 
    796    gs->position_output = -1;
    797    for (i = 0; i < gs->info.num_outputs; i++) {
    798       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
    799           gs->info.output_semantic_index[i] == 0)
    800          gs->position_output = i;
    801       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX)
    802          gs->viewport_index_output = i;
    803       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
    804          debug_assert(gs->info.output_semantic_index[i] <
    805                       PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
    806          gs->ccdistance_output[gs->info.output_semantic_index[i]] = i;
    807       }
    808    }
    809 
    810    gs->machine = draw->gs.tgsi.machine;
    811 
    812 #ifdef HAVE_LLVM
    813    if (use_llvm) {
    814       int vector_size = gs->vector_length * sizeof(float);
    815       gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16);
    816       memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs));
    817       gs->llvm_prim_lengths = 0;
    818 
    819       gs->llvm_emitted_primitives = align_malloc(vector_size, vector_size);
    820       gs->llvm_emitted_vertices = align_malloc(vector_size, vector_size);
    821       gs->llvm_prim_ids = align_malloc(vector_size, vector_size);
    822 
    823       gs->fetch_outputs = llvm_fetch_gs_outputs;
    824       gs->fetch_inputs = llvm_fetch_gs_input;
    825       gs->prepare = llvm_gs_prepare;
    826       gs->run = llvm_gs_run;
    827 
    828       gs->jit_context = &draw->llvm->gs_jit_context;
    829 
    830 
    831       llvm_gs->variant_key_size =
    832          draw_gs_llvm_variant_key_size(
    833             MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1,
    834                  gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1));
    835    } else
    836 #endif
    837    {
    838       gs->fetch_outputs = tgsi_fetch_gs_outputs;
    839       gs->fetch_inputs = tgsi_fetch_gs_input;
    840       gs->prepare = tgsi_gs_prepare;
    841       gs->run = tgsi_gs_run;
    842    }
    843 
    844    return gs;
    845 }
    846 
    847 void draw_bind_geometry_shader(struct draw_context *draw,
    848                                struct draw_geometry_shader *dgs)
    849 {
    850    draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
    851 
    852    if (dgs) {
    853       draw->gs.geometry_shader = dgs;
    854       draw->gs.num_gs_outputs = dgs->info.num_outputs;
    855       draw->gs.position_output = dgs->position_output;
    856       draw_geometry_shader_prepare(dgs, draw);
    857    }
    858    else {
    859       draw->gs.geometry_shader = NULL;
    860       draw->gs.num_gs_outputs = 0;
    861    }
    862 }
    863 
    864 void draw_delete_geometry_shader(struct draw_context *draw,
    865                                  struct draw_geometry_shader *dgs)
    866 {
    867    if (!dgs) {
    868       return;
    869    }
    870 #ifdef HAVE_LLVM
    871    if (draw->llvm) {
    872       struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs);
    873       struct draw_gs_llvm_variant_list_item *li;
    874 
    875       li = first_elem(&shader->variants);
    876       while(!at_end(&shader->variants, li)) {
    877          struct draw_gs_llvm_variant_list_item *next = next_elem(li);
    878          draw_gs_llvm_destroy_variant(li->base);
    879          li = next;
    880       }
    881 
    882       assert(shader->variants_cached == 0);
    883 
    884       if (dgs->llvm_prim_lengths) {
    885          unsigned i;
    886          for (i = 0; i < dgs->max_out_prims; ++i) {
    887             align_free(dgs->llvm_prim_lengths[i]);
    888          }
    889          FREE(dgs->llvm_prim_lengths);
    890       }
    891       align_free(dgs->llvm_emitted_primitives);
    892       align_free(dgs->llvm_emitted_vertices);
    893       align_free(dgs->llvm_prim_ids);
    894 
    895       align_free(dgs->gs_input);
    896    }
    897 #endif
    898 
    899    FREE(dgs->primitive_lengths);
    900    FREE((void*) dgs->state.tokens);
    901    FREE(dgs);
    902 }
    903 
    904 
    905 #ifdef HAVE_LLVM
    906 void draw_gs_set_current_variant(struct draw_geometry_shader *shader,
    907                                  struct draw_gs_llvm_variant *variant)
    908 {
    909    shader->current_variant = variant;
    910 }
    911 #endif
    912 
    913 /*
    914  * Called at the very begin of the draw call with a new instance
    915  * Used to reset state that should persist between primitive restart.
    916  */
    917 void
    918 draw_geometry_shader_new_instance(struct draw_geometry_shader *gs)
    919 {
    920    if (!gs)
    921       return;
    922 
    923    gs->in_prim_idx = 0;
    924 }
    925