Home | History | Annotate | Download | only in draw
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 #include "draw_llvm.h"
     29 
     30 #include "draw_context.h"
     31 #include "draw_vs.h"
     32 #include "draw_gs.h"
     33 
     34 #include "gallivm/lp_bld_arit.h"
     35 #include "gallivm/lp_bld_arit_overflow.h"
     36 #include "gallivm/lp_bld_bitarit.h"
     37 #include "gallivm/lp_bld_gather.h"
     38 #include "gallivm/lp_bld_logic.h"
     39 #include "gallivm/lp_bld_const.h"
     40 #include "gallivm/lp_bld_swizzle.h"
     41 #include "gallivm/lp_bld_struct.h"
     42 #include "gallivm/lp_bld_type.h"
     43 #include "gallivm/lp_bld_flow.h"
     44 #include "gallivm/lp_bld_debug.h"
     45 #include "gallivm/lp_bld_tgsi.h"
     46 #include "gallivm/lp_bld_printf.h"
     47 #include "gallivm/lp_bld_intr.h"
     48 #include "gallivm/lp_bld_init.h"
     49 #include "gallivm/lp_bld_type.h"
     50 #include "gallivm/lp_bld_pack.h"
     51 #include "gallivm/lp_bld_format.h"
     52 
     53 #include "tgsi/tgsi_exec.h"
     54 #include "tgsi/tgsi_dump.h"
     55 
     56 #include "util/u_math.h"
     57 #include "util/u_pointer.h"
     58 #include "util/u_string.h"
     59 #include "util/simple_list.h"
     60 
     61 
     62 #define DEBUG_STORE 0
     63 
     64 
     65 static void
     66 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
     67 
     68 
     69 struct draw_gs_llvm_iface {
     70    struct lp_build_tgsi_gs_iface base;
     71 
     72    struct draw_gs_llvm_variant *variant;
     73    LLVMValueRef input;
     74 };
     75 
     76 static inline const struct draw_gs_llvm_iface *
     77 draw_gs_llvm_iface(const struct lp_build_tgsi_gs_iface *iface)
     78 {
     79    return (const struct draw_gs_llvm_iface *)iface;
     80 }
     81 
     82 /**
     83  * Create LLVM type for draw_vertex_buffer.
     84  */
     85 static LLVMTypeRef
     86 create_jit_dvbuffer_type(struct gallivm_state *gallivm,
     87                          const char *struct_name)
     88 {
     89    LLVMTargetDataRef target = gallivm->target;
     90    LLVMTypeRef dvbuffer_type;
     91    LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
     92    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
     93 
     94    elem_types[DRAW_JIT_DVBUFFER_MAP] =
     95       LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
     96    elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
     97 
     98    dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
     99                                            ARRAY_SIZE(elem_types), 0);
    100 
    101    (void) target; /* silence unused var warning for non-debug build */
    102    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
    103                           target, dvbuffer_type,
    104                           DRAW_JIT_DVBUFFER_MAP);
    105    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
    106                           target, dvbuffer_type,
    107                           DRAW_JIT_DVBUFFER_SIZE);
    108 
    109    return dvbuffer_type;
    110 }
    111 
    112 /**
    113  * Create LLVM type for struct draw_jit_texture
    114  */
    115 static LLVMTypeRef
    116 create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
    117 {
    118    LLVMTargetDataRef target = gallivm->target;
    119    LLVMTypeRef texture_type;
    120    LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
    121    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
    122 
    123    elem_types[DRAW_JIT_TEXTURE_WIDTH]  =
    124    elem_types[DRAW_JIT_TEXTURE_HEIGHT] =
    125    elem_types[DRAW_JIT_TEXTURE_DEPTH] =
    126    elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] =
    127    elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;
    128    elem_types[DRAW_JIT_TEXTURE_BASE] =
    129       LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
    130    elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
    131    elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
    132    elem_types[DRAW_JIT_TEXTURE_MIP_OFFSETS] =
    133       LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);
    134 
    135    texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
    136                                           ARRAY_SIZE(elem_types), 0);
    137 
    138    (void) target; /* silence unused var warning for non-debug build */
    139    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
    140                           target, texture_type,
    141                           DRAW_JIT_TEXTURE_WIDTH);
    142    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
    143                           target, texture_type,
    144                           DRAW_JIT_TEXTURE_HEIGHT);
    145    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
    146                           target, texture_type,
    147                           DRAW_JIT_TEXTURE_DEPTH);
    148    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level,
    149                           target, texture_type,
    150                           DRAW_JIT_TEXTURE_FIRST_LEVEL);
    151    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
    152                           target, texture_type,
    153                           DRAW_JIT_TEXTURE_LAST_LEVEL);
    154    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, base,
    155                           target, texture_type,
    156                           DRAW_JIT_TEXTURE_BASE);
    157    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
    158                           target, texture_type,
    159                           DRAW_JIT_TEXTURE_ROW_STRIDE);
    160    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
    161                           target, texture_type,
    162                           DRAW_JIT_TEXTURE_IMG_STRIDE);
    163    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, mip_offsets,
    164                           target, texture_type,
    165                           DRAW_JIT_TEXTURE_MIP_OFFSETS);
    166 
    167    LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);
    168 
    169    return texture_type;
    170 }
    171 
    172 
    173 /**
    174  * Create LLVM type for struct draw_jit_sampler
    175  */
    176 static LLVMTypeRef
    177 create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)
    178 {
    179    LLVMTargetDataRef target = gallivm->target;
    180    LLVMTypeRef sampler_type;
    181    LLVMTypeRef elem_types[DRAW_JIT_SAMPLER_NUM_FIELDS];
    182 
    183    elem_types[DRAW_JIT_SAMPLER_MIN_LOD] =
    184    elem_types[DRAW_JIT_SAMPLER_MAX_LOD] =
    185    elem_types[DRAW_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context);
    186    elem_types[DRAW_JIT_SAMPLER_BORDER_COLOR] =
    187       LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
    188 
    189    sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,
    190                                           ARRAY_SIZE(elem_types), 0);
    191 
    192    (void) target; /* silence unused var warning for non-debug build */
    193    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,
    194                           target, sampler_type,
    195                           DRAW_JIT_SAMPLER_MIN_LOD);
    196    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_lod,
    197                           target, sampler_type,
    198                           DRAW_JIT_SAMPLER_MAX_LOD);
    199    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, lod_bias,
    200                           target, sampler_type,
    201                           DRAW_JIT_SAMPLER_LOD_BIAS);
    202    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, border_color,
    203                           target, sampler_type,
    204                           DRAW_JIT_SAMPLER_BORDER_COLOR);
    205 
    206    LP_CHECK_STRUCT_SIZE(struct draw_jit_sampler, target, sampler_type);
    207 
    208    return sampler_type;
    209 }
    210 
    211 
    212 /**
    213  * Create LLVM type for struct draw_jit_context
    214  */
    215 static LLVMTypeRef
    216 create_jit_context_type(struct gallivm_state *gallivm,
    217                         LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
    218                         const char *struct_name)
    219 {
    220    LLVMTargetDataRef target = gallivm->target;
    221    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
    222    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
    223    LLVMTypeRef elem_types[DRAW_JIT_CTX_NUM_FIELDS];
    224    LLVMTypeRef context_type;
    225 
    226    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* vs_constants */
    227                                  LP_MAX_TGSI_CONST_BUFFERS);
    228    elem_types[1] = LLVMArrayType(int_type, /* num_vs_constants */
    229                                  LP_MAX_TGSI_CONST_BUFFERS);
    230    elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
    231                                                  DRAW_TOTAL_CLIP_PLANES), 0);
    232    elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */
    233    elem_types[4] = LLVMArrayType(texture_type,
    234                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
    235    elem_types[5] = LLVMArrayType(sampler_type,
    236                                  PIPE_MAX_SAMPLERS); /* samplers */
    237    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
    238                                           ARRAY_SIZE(elem_types), 0);
    239 
    240    (void) target; /* silence unused var warning for non-debug build */
    241    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
    242                           target, context_type, DRAW_JIT_CTX_CONSTANTS);
    243    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_constants,
    244                           target, context_type, DRAW_JIT_CTX_NUM_CONSTANTS);
    245    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
    246                           target, context_type, DRAW_JIT_CTX_PLANES);
    247    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewports,
    248                           target, context_type, DRAW_JIT_CTX_VIEWPORT);
    249    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
    250                           target, context_type,
    251                           DRAW_JIT_CTX_TEXTURES);
    252    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, samplers,
    253                           target, context_type,
    254                           DRAW_JIT_CTX_SAMPLERS);
    255    LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
    256                         target, context_type);
    257 
    258    return context_type;
    259 }
    260 
    261 
    262 /**
    263  * Create LLVM type for struct draw_gs_jit_context
    264  */
    265 static LLVMTypeRef
    266 create_gs_jit_context_type(struct gallivm_state *gallivm,
    267                            unsigned vector_length,
    268                            LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
    269                            const char *struct_name)
    270 {
    271    LLVMTargetDataRef target = gallivm->target;
    272    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
    273    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
    274    LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
    275    LLVMTypeRef context_type;
    276 
    277    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
    278                                  LP_MAX_TGSI_CONST_BUFFERS);
    279    elem_types[1] = LLVMArrayType(int_type, /* num_constants */
    280                                  LP_MAX_TGSI_CONST_BUFFERS);
    281    elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
    282                                                  DRAW_TOTAL_CLIP_PLANES), 0);
    283    elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */
    284 
    285    elem_types[4] = LLVMArrayType(texture_type,
    286                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
    287    elem_types[5] = LLVMArrayType(sampler_type,
    288                                  PIPE_MAX_SAMPLERS); /* samplers */
    289 
    290    elem_types[6] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
    291    elem_types[7] = LLVMPointerType(LLVMVectorType(int_type,
    292                                                   vector_length), 0);
    293    elem_types[8] = LLVMPointerType(LLVMVectorType(int_type,
    294                                                   vector_length), 0);
    295 
    296    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
    297                                           ARRAY_SIZE(elem_types), 0);
    298 
    299    (void) target; /* silence unused var warning for non-debug build */
    300    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,
    301                           target, context_type, DRAW_GS_JIT_CTX_CONSTANTS);
    302    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_constants,
    303                           target, context_type, DRAW_GS_JIT_CTX_NUM_CONSTANTS);
    304    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
    305                           target, context_type, DRAW_GS_JIT_CTX_PLANES);
    306    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports,
    307                           target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
    308    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,
    309                           target, context_type,
    310                           DRAW_GS_JIT_CTX_TEXTURES);
    311    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,
    312                           target, context_type,
    313                           DRAW_GS_JIT_CTX_SAMPLERS);
    314    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
    315                           target, context_type,
    316                           DRAW_GS_JIT_CTX_PRIM_LENGTHS);
    317    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
    318                           target, context_type,
    319                           DRAW_GS_JIT_CTX_EMITTED_VERTICES);
    320    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
    321                           target, context_type,
    322                           DRAW_GS_JIT_CTX_EMITTED_PRIMS);
    323    LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
    324                         target, context_type);
    325 
    326    return context_type;
    327 }
    328 
    329 
    330 static LLVMTypeRef
    331 create_gs_jit_input_type(struct gallivm_state *gallivm)
    332 {
    333    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
    334    LLVMTypeRef input_array;
    335 
    336    input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
    337    input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
    338    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
    339    input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
    340 
    341    return input_array;
    342 }
    343 
    344 /**
    345  * Create LLVM type for struct pipe_vertex_buffer
    346  */
    347 static LLVMTypeRef
    348 create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
    349                               const char *struct_name)
    350 {
    351    LLVMTargetDataRef target = gallivm->target;
    352    LLVMTypeRef elem_types[4];
    353    LLVMTypeRef vb_type;
    354 
    355    elem_types[0] =
    356    elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
    357    elem_types[2] =
    358    elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
    359 
    360    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
    361                                      ARRAY_SIZE(elem_types), 0);
    362 
    363    (void) target; /* silence unused var warning for non-debug build */
    364    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
    365                           target, vb_type, 0);
    366    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
    367                           target, vb_type, 1);
    368 
    369    LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
    370 
    371    return vb_type;
    372 }
    373 
    374 
    375 /**
    376  * Create LLVM type for struct vertex_header;
    377  */
    378 static LLVMTypeRef
    379 create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
    380 {
    381    LLVMTargetDataRef target = gallivm->target;
    382    LLVMTypeRef elem_types[3];
    383    LLVMTypeRef vertex_header;
    384    char struct_name[24];
    385 
    386    util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
    387 
    388    elem_types[DRAW_JIT_VERTEX_VERTEX_ID]  = LLVMIntTypeInContext(gallivm->context, 32);
    389    elem_types[DRAW_JIT_VERTEX_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
    390    elem_types[DRAW_JIT_VERTEX_DATA]  = LLVMArrayType(elem_types[1], data_elems);
    391 
    392    vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
    393                                            ARRAY_SIZE(elem_types), 0);
    394 
    395    /* these are bit-fields and we can't take address of them
    396       LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
    397       target, vertex_header,
    398       DRAW_JIT_VERTEX_CLIPMASK);
    399       LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
    400       target, vertex_header,
    401       DRAW_JIT_VERTEX_EDGEFLAG);
    402       LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
    403       target, vertex_header,
    404       DRAW_JIT_VERTEX_PAD);
    405       LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
    406       target, vertex_header,
    407       DRAW_JIT_VERTEX_VERTEX_ID);
    408    */
    409    (void) target; /* silence unused var warning for non-debug build */
    410    LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip_pos,
    411                           target, vertex_header,
    412                           DRAW_JIT_VERTEX_CLIP_POS);
    413    LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
    414                           target, vertex_header,
    415                           DRAW_JIT_VERTEX_DATA);
    416 
    417    assert(LLVMABISizeOfType(target, vertex_header) ==
    418           offsetof(struct vertex_header, data[data_elems]));
    419 
    420    return vertex_header;
    421 }
    422 
    423 
    424 /**
    425  * Create LLVM types for various structures.
    426  */
    427 static void
    428 create_jit_types(struct draw_llvm_variant *variant)
    429 {
    430    struct gallivm_state *gallivm = variant->gallivm;
    431    LLVMTypeRef texture_type, sampler_type, context_type, buffer_type,
    432       vb_type;
    433 
    434    texture_type = create_jit_texture_type(gallivm, "texture");
    435    sampler_type = create_jit_sampler_type(gallivm, "sampler");
    436 
    437    context_type = create_jit_context_type(gallivm, texture_type, sampler_type,
    438                                           "draw_jit_context");
    439    variant->context_ptr_type = LLVMPointerType(context_type, 0);
    440 
    441    buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
    442    variant->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
    443 
    444    vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
    445    variant->vb_ptr_type = LLVMPointerType(vb_type, 0);
    446 }
    447 
    448 
    449 static LLVMTypeRef
    450 get_context_ptr_type(struct draw_llvm_variant *variant)
    451 {
    452    if (!variant->context_ptr_type)
    453       create_jit_types(variant);
    454    return variant->context_ptr_type;
    455 }
    456 
    457 
    458 static LLVMTypeRef
    459 get_buffer_ptr_type(struct draw_llvm_variant *variant)
    460 {
    461    if (!variant->buffer_ptr_type)
    462       create_jit_types(variant);
    463    return variant->buffer_ptr_type;
    464 }
    465 
    466 
    467 static LLVMTypeRef
    468 get_vb_ptr_type(struct draw_llvm_variant *variant)
    469 {
    470    if (!variant->vb_ptr_type)
    471       create_jit_types(variant);
    472    return variant->vb_ptr_type;
    473 }
    474 
    475 static LLVMTypeRef
    476 get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
    477 {
    478    if (!variant->vertex_header_ptr_type)
    479       create_jit_types(variant);
    480    return variant->vertex_header_ptr_type;
    481 }
    482 
    483 
    484 /**
    485  * Create per-context LLVM info.
    486  */
    487 struct draw_llvm *
    488 draw_llvm_create(struct draw_context *draw, LLVMContextRef context)
    489 {
    490    struct draw_llvm *llvm;
    491 
    492    if (!lp_build_init())
    493       return NULL;
    494 
    495    llvm = CALLOC_STRUCT( draw_llvm );
    496    if (!llvm)
    497       return NULL;
    498 
    499    llvm->draw = draw;
    500 
    501    llvm->context = context;
    502    if (!llvm->context) {
    503       llvm->context = LLVMContextCreate();
    504       llvm->context_owned = true;
    505    }
    506    if (!llvm->context)
    507       goto fail;
    508 
    509    llvm->nr_variants = 0;
    510    make_empty_list(&llvm->vs_variants_list);
    511 
    512    llvm->nr_gs_variants = 0;
    513    make_empty_list(&llvm->gs_variants_list);
    514 
    515    return llvm;
    516 
    517 fail:
    518    draw_llvm_destroy(llvm);
    519    return NULL;
    520 }
    521 
    522 
    523 /**
    524  * Free per-context LLVM info.
    525  */
    526 void
    527 draw_llvm_destroy(struct draw_llvm *llvm)
    528 {
    529    if (llvm->context_owned)
    530       LLVMContextDispose(llvm->context);
    531    llvm->context = NULL;
    532 
    533    /* XXX free other draw_llvm data? */
    534    FREE(llvm);
    535 }
    536 
    537 
    538 /**
    539  * Create LLVM-generated code for a vertex shader.
    540  */
    541 struct draw_llvm_variant *
    542 draw_llvm_create_variant(struct draw_llvm *llvm,
    543                          unsigned num_inputs,
    544                          const struct draw_llvm_variant_key *key)
    545 {
    546    struct draw_llvm_variant *variant;
    547    struct llvm_vertex_shader *shader =
    548       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
    549    LLVMTypeRef vertex_header;
    550    char module_name[64];
    551 
    552    variant = MALLOC(sizeof *variant +
    553                     shader->variant_key_size -
    554                     sizeof variant->key);
    555    if (!variant)
    556       return NULL;
    557 
    558    variant->llvm = llvm;
    559    variant->shader = shader;
    560 
    561    util_snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u",
    562                  variant->shader->variants_cached);
    563 
    564    variant->gallivm = gallivm_create(module_name, llvm->context);
    565 
    566    create_jit_types(variant);
    567 
    568    memcpy(&variant->key, key, shader->variant_key_size);
    569 
    570    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
    571       tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0);
    572       draw_llvm_dump_variant_key(&variant->key);
    573    }
    574 
    575    vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs);
    576 
    577    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
    578 
    579    draw_llvm_generate(llvm, variant);
    580 
    581    gallivm_compile_module(variant->gallivm);
    582 
    583    variant->jit_func = (draw_jit_vert_func)
    584          gallivm_jit_function(variant->gallivm, variant->function);
    585 
    586    gallivm_free_ir(variant->gallivm);
    587 
    588    variant->list_item_global.base = variant;
    589    variant->list_item_local.base = variant;
    590    /*variant->no = */shader->variants_created++;
    591    variant->list_item_global.base = variant;
    592 
    593    return variant;
    594 }
    595 
    596 
    597 static void
    598 generate_vs(struct draw_llvm_variant *variant,
    599             LLVMBuilderRef builder,
    600             struct lp_type vs_type,
    601             LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
    602             const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
    603             const struct lp_bld_tgsi_system_values *system_values,
    604             LLVMValueRef context_ptr,
    605             struct lp_build_sampler_soa *draw_sampler,
    606             boolean clamp_vertex_color)
    607 {
    608    struct draw_llvm *llvm = variant->llvm;
    609    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
    610    LLVMValueRef consts_ptr =
    611       draw_jit_context_vs_constants(variant->gallivm, context_ptr);
    612    LLVMValueRef num_consts_ptr =
    613       draw_jit_context_num_vs_constants(variant->gallivm, context_ptr);
    614 
    615    lp_build_tgsi_soa(variant->gallivm,
    616                      tokens,
    617                      vs_type,
    618                      NULL /*struct lp_build_mask_context *mask*/,
    619                      consts_ptr,
    620                      num_consts_ptr,
    621                      system_values,
    622                      inputs,
    623                      outputs,
    624                      context_ptr,
    625                      NULL,
    626                      draw_sampler,
    627                      &llvm->draw->vs.vertex_shader->info,
    628                      NULL);
    629 
    630    {
    631       LLVMValueRef out;
    632       unsigned chan, attrib;
    633       struct lp_build_context bld;
    634       struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info;
    635       lp_build_context_init(&bld, variant->gallivm, vs_type);
    636 
    637       for (attrib = 0; attrib < info->num_outputs; ++attrib) {
    638          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
    639             if (outputs[attrib][chan]) {
    640                switch (info->output_semantic_name[attrib]) {
    641                case TGSI_SEMANTIC_COLOR:
    642                case TGSI_SEMANTIC_BCOLOR:
    643                   if (clamp_vertex_color) {
    644                      out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
    645                      out = lp_build_clamp(&bld, out, bld.zero, bld.one);
    646                      LLVMBuildStore(builder, out, outputs[attrib][chan]);
    647                   }
    648                   break;
    649                }
    650             }
    651          }
    652       }
    653    }
    654 }
    655 
    656 
    657 static void
    658 fetch_instanced(struct gallivm_state *gallivm,
    659                 const struct util_format_description *format_desc,
    660                 struct lp_type vs_type,
    661                 LLVMValueRef vb_stride,
    662                 LLVMValueRef map_ptr,
    663                 LLVMValueRef buffer_size_adj,
    664                 LLVMValueRef *inputs,
    665                 LLVMValueRef index)
    666 {
    667    LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
    668    LLVMTypeRef aosf_t, aosi_t;
    669    LLVMValueRef zero = LLVMConstNull(i32_t);
    670    LLVMBuilderRef builder = gallivm->builder;
    671    LLVMValueRef stride, buffer_overflowed, aos, index_valid;
    672    unsigned i;
    673 
    674    aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
    675    aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
    676 
    677    /* This mul can overflow. Wraparound is ok. */
    678    stride = LLVMBuildMul(builder, vb_stride, index, "");
    679 
    680    buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
    681                                      stride, buffer_size_adj,
    682                                      "buffer_overflowed");
    683 
    684    if (0) {
    685       lp_build_print_value(gallivm, "   instance index = ", index);
    686       lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
    687    }
    688 
    689    index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
    690    index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
    691    stride = LLVMBuildAnd(builder, stride, index_valid, "");
    692 
    693    aos = lp_build_fetch_rgba_aos(gallivm,
    694                                  format_desc,
    695                                  lp_float32_vec4_type(),
    696                                  FALSE,
    697                                  map_ptr,
    698                                  stride, zero, zero,
    699                                  NULL);
    700 
    701    index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
    702    aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
    703    aos = LLVMBuildAnd(builder, aos, index_valid, "");
    704    aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
    705 
    706    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
    707       LLVMValueRef index = lp_build_const_int32(gallivm, i);
    708       inputs[i] = lp_build_extract_broadcast(gallivm,
    709                                              lp_float32_vec4_type(),
    710                                              vs_type, aos, index);
    711    }
    712 }
    713 
    714 
    715 static void
    716 fetch_vector(struct gallivm_state *gallivm,
    717              const struct util_format_description *format_desc,
    718              struct lp_type vs_type,
    719              LLVMValueRef vb_stride,
    720              LLVMValueRef map_ptr,
    721              LLVMValueRef buffer_size_adj,
    722              LLVMValueRef *inputs,
    723              LLVMValueRef indices)
    724 {
    725    LLVMBuilderRef builder = gallivm->builder;
    726    struct lp_build_context blduivec;
    727    struct lp_type fetch_type = vs_type;
    728    LLVMValueRef offset, valid_mask;
    729    unsigned i;
    730 
    731    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
    732 
    733    vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride);
    734    buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj);
    735 
    736    /* This mul can overflow. Wraparound is ok. */
    737    offset = lp_build_mul(&blduivec, vb_stride, indices);
    738 
    739    valid_mask = lp_build_compare(gallivm, blduivec.type,
    740                                  PIPE_FUNC_LESS, offset, buffer_size_adj);
    741 
    742    /* not valid elements use offset 0 */
    743    offset = LLVMBuildAnd(builder, offset, valid_mask, "");
    744 
    745    if (0) {
    746       lp_build_print_value(gallivm, "   indices = ", indices);
    747       lp_build_print_value(gallivm, "   offsets = ", offset);
    748       lp_build_print_value(gallivm, "   valid_mask = ", valid_mask);
    749    }
    750 
    751    /*
    752     * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
    753     * This should always produce better code.
    754     */
    755 
    756    /* The type handling is annoying here... */
    757    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
    758        format_desc->channel[0].pure_integer) {
    759       if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
    760          fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
    761       }
    762       else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
    763          fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
    764       }
    765    }
    766 
    767    lp_build_fetch_rgba_soa(gallivm, format_desc,
    768                            fetch_type, FALSE, map_ptr, offset,
    769                            blduivec.zero, blduivec.zero,
    770                            NULL, inputs);
    771 
    772    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
    773       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
    774                                    lp_build_vec_type(gallivm, vs_type), "");
    775    }
    776 
    777    /* out-of-bound fetches return all zeros */
    778    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
    779       inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
    780       inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
    781       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
    782                                    lp_build_vec_type(gallivm, vs_type), "");
    783    }
    784 }
    785 
    786 
    787 static void
    788 store_aos(struct gallivm_state *gallivm,
    789           LLVMValueRef io_ptr,
    790           LLVMValueRef index,
    791           LLVMValueRef value)
    792 {
    793    LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
    794    LLVMBuilderRef builder = gallivm->builder;
    795    LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
    796    LLVMValueRef indices[3];
    797 
    798    indices[0] = lp_build_const_int32(gallivm, 0);
    799    indices[1] = index;
    800    indices[2] = lp_build_const_int32(gallivm, 0);
    801 
    802    data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
    803    data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
    804 
    805 #if DEBUG_STORE
    806    lp_build_printf(gallivm, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
    807 #endif
    808 
    809    /* Unaligned store due to the vertex header */
    810    LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
    811 }
    812 
    813 /**
    814  * Adjust the mask to architecture endianess. The mask will the store in struct:
    815  *
    816  * struct vertex_header {
    817  *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
    818  *    unsigned edgeflag:1;
    819  *    unsigned pad:1;
    820  *    unsigned vertex_id:16;
    821  *    [...]
    822  * }
    823  *
    824  * On little-endian machine nothing needs to done, however on bit-endian machine
    825  * the mask's fields need to be adjusted with the algorithm:
    826  *
    827  * uint32_t reverse (uint32_t x)
    828  * {
    829  *   return (x >> 16) |              // vertex_id
    830  *          ((x & 0x3fff) << 18) |   // clipmask
    831  *          ((x & 0x4000) << 3) |    // pad
    832  *          ((x & 0x8000) << 1);     // edgeflag
    833  * }
    834  */
    835 static LLVMValueRef
    836 adjust_mask(struct gallivm_state *gallivm,
    837             LLVMValueRef mask)
    838 {
    839 #ifdef PIPE_ARCH_BIG_ENDIAN
    840    LLVMBuilderRef builder = gallivm->builder;
    841    LLVMValueRef vertex_id;
    842    LLVMValueRef clipmask;
    843    LLVMValueRef pad;
    844    LLVMValueRef edgeflag;
    845 
    846    vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
    847    clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
    848    clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
    849    if (0) {
    850       pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
    851       pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 3), "");
    852    }
    853    edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
    854    edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 1), "");
    855 
    856    mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
    857    if (0) {
    858       mask = LLVMBuildOr(builder, mask, pad, "");
    859    }
    860    mask = LLVMBuildOr(builder, mask, edgeflag, "");
    861 #endif
    862    return mask;
    863 }
    864 
    865 static void
    866 store_aos_array(struct gallivm_state *gallivm,
    867                 struct lp_type soa_type,
    868                 LLVMValueRef io_ptr,
    869                 LLVMValueRef *indices,
    870                 LLVMValueRef* aos,
    871                 int attrib,
    872                 int num_outputs,
    873                 LLVMValueRef clipmask,
    874                 boolean need_edgeflag)
    875 {
    876    LLVMBuilderRef builder = gallivm->builder;
    877    LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
    878    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
    879    LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];
    880    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
    881    int vector_length = soa_type.length;
    882    int i;
    883 
    884    debug_assert(TGSI_NUM_CHANNELS == 4);
    885 
    886    for (i = 0; i < vector_length; i++) {
    887       linear_inds[i] = lp_build_const_int32(gallivm, i);
    888       if (indices) {
    889          inds[i] = indices[i];
    890       } else {
    891          inds[i] = linear_inds[i];
    892       }
    893       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
    894    }
    895 
    896    if (attrib == 0) {
    897       /* store vertex header for each of the n vertices */
    898       LLVMValueRef val, cliptmp;
    899       int vertex_id_pad_edgeflag;
    900 
    901       /* If this assertion fails, it means we need to update the bit twidding
    902        * code here.  See struct vertex_header in draw_private.h.
    903        */
    904       assert(DRAW_TOTAL_CLIP_PLANES==14);
    905       /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
    906       if (!need_edgeflag) {
    907          vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
    908       }
    909       else {
    910          vertex_id_pad_edgeflag = (0xffff << 16);
    911       }
    912       val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
    913                                    vertex_id_pad_edgeflag);
    914       /* OR with the clipmask */
    915       cliptmp = LLVMBuildOr(builder, val, clipmask, "");
    916       for (i = 0; i < vector_length; i++) {
    917          LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptrs[i]);
    918          val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");
    919          val = adjust_mask(gallivm, val);
    920 #if DEBUG_STORE
    921          lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
    922                          io_ptrs[i], inds[i], val);
    923 #endif
    924          LLVMBuildStore(builder, val, id_ptr);
    925       }
    926    }
    927 
    928    /* store for each of the n vertices */
    929    for (i = 0; i < vector_length; i++) {
    930       store_aos(gallivm, io_ptrs[i], attr_index, aos[i]);
    931    }
    932 }
    933 
    934 
    935 static void
    936 convert_to_aos(struct gallivm_state *gallivm,
    937                LLVMValueRef io,
    938                LLVMValueRef *indices,
    939                LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
    940                LLVMValueRef clipmask,
    941                int num_outputs,
    942                struct lp_type soa_type,
    943                boolean need_edgeflag)
    944 {
    945    LLVMBuilderRef builder = gallivm->builder;
    946    unsigned chan, attrib, i;
    947 
    948 #if DEBUG_STORE
    949    lp_build_printf(gallivm, "   # storing begin\n");
    950 #endif
    951    for (attrib = 0; attrib < num_outputs; ++attrib) {
    952       LLVMValueRef soa[TGSI_NUM_CHANNELS];
    953       LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
    954       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
    955          if (outputs[attrib][chan]) {
    956             LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
    957             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
    958 #if DEBUG_STORE
    959             lp_build_printf(gallivm, "output %d : %d ",
    960                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
    961                                          attrib, 0),
    962                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
    963                                          chan, 0));
    964             lp_build_print_value(gallivm, "val = ", out);
    965             {
    966                LLVMValueRef iv =
    967                   LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
    968 
    969                lp_build_print_value(gallivm, "  ival = ", iv);
    970             }
    971 #endif
    972             soa[chan] = out;
    973          }
    974          else {
    975             soa[chan] = 0;
    976          }
    977       }
    978 
    979 
    980       if (soa_type.length == TGSI_NUM_CHANNELS) {
    981          lp_build_transpose_aos(gallivm, soa_type, soa, aos);
    982       } else {
    983          lp_build_transpose_aos(gallivm, soa_type, soa, soa);
    984 
    985          for (i = 0; i < soa_type.length; ++i) {
    986             aos[i] = lp_build_extract_range(gallivm,
    987                                             soa[i % TGSI_NUM_CHANNELS],
    988                                             (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
    989                                             TGSI_NUM_CHANNELS);
    990          }
    991       }
    992 
    993       store_aos_array(gallivm,
    994                       soa_type,
    995                       io, indices,
    996                       aos,
    997                       attrib,
    998                       num_outputs,
    999                       clipmask,
   1000                       need_edgeflag);
   1001    }
   1002 #if DEBUG_STORE
   1003    lp_build_printf(gallivm, "   # storing end\n");
   1004 #endif
   1005 }
   1006 
   1007 
   1008 /**
   1009  * Stores original vertex positions in clip coordinates
   1010  */
   1011 static void
   1012 store_clip(struct gallivm_state *gallivm,
   1013            const struct lp_type vs_type,
   1014            LLVMValueRef io_ptr,
   1015            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
   1016            int idx)
   1017 {
   1018    LLVMBuilderRef builder = gallivm->builder;
   1019    LLVMValueRef soa[4];
   1020    LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
   1021    LLVMValueRef indices[2];
   1022    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
   1023    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
   1024    LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
   1025    LLVMTypeRef clip_ptr_type =
   1026       LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context),
   1027                                      4), 0);
   1028    int i, j;
   1029 
   1030    indices[0] =
   1031    indices[1] = lp_build_const_int32(gallivm, 0);
   1032 
   1033    for (i = 0; i < vs_type.length; i++) {
   1034       inds[i] = lp_build_const_int32(gallivm, i);
   1035       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
   1036    }
   1037 
   1038    soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/
   1039    soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/
   1040    soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
   1041    soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/
   1042 
   1043    for (i = 0; i < vs_type.length; i++) {
   1044       clip_ptrs[i] = draw_jit_header_clip_pos(gallivm, io_ptrs[i]);
   1045    }
   1046 
   1047    lp_build_transpose_aos(gallivm, vs_type, soa, soa);
   1048    for (i = 0; i < vs_type.length; ++i) {
   1049       aos[i] = lp_build_extract_range(gallivm,
   1050                                       soa[i % TGSI_NUM_CHANNELS],
   1051                                       (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
   1052                                       TGSI_NUM_CHANNELS);
   1053    }
   1054 
   1055    for (j = 0; j < vs_type.length; j++) {
   1056       LLVMValueRef clip_ptr;
   1057 
   1058       clip_ptr = LLVMBuildGEP(builder, clip_ptrs[j], indices, 2, "clipo");
   1059       clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, "");
   1060 
   1061       /* Unaligned store */
   1062       LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
   1063    }
   1064 }
   1065 
   1066 
   1067 /**
   1068  * Transforms the outputs for viewport mapping
   1069  */
   1070 static void
   1071 generate_viewport(struct draw_llvm_variant *variant,
   1072                   LLVMBuilderRef builder,
   1073                   struct lp_type vs_type,
   1074                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
   1075                   LLVMValueRef context_ptr)
   1076 {
   1077    int i;
   1078    struct gallivm_state *gallivm = variant->gallivm;
   1079    struct lp_type f32_type = vs_type;
   1080    const unsigned pos = variant->llvm->draw->vs.position_output;
   1081    LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
   1082    LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn*/
   1083    LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
   1084    LLVMValueRef vp_ptr = draw_jit_context_viewports(gallivm, context_ptr);
   1085 
   1086    /* We treat pipe_viewport_state as a float array */
   1087    const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float);
   1088    const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float);
   1089 
   1090    /* for 1/w convention*/
   1091    out3 = LLVMBuildFDiv(builder, const1, out3, "");
   1092    LLVMBuildStore(builder, out3, outputs[pos][3]);
   1093 
   1094    /* Viewport Mapping */
   1095    for (i=0; i<3; i++) {
   1096       LLVMValueRef out = LLVMBuildLoad(builder, outputs[pos][i], ""); /*x0 x1 .. xn*/
   1097       LLVMValueRef scale;
   1098       LLVMValueRef trans;
   1099       LLVMValueRef scale_i;
   1100       LLVMValueRef trans_i;
   1101       LLVMValueRef index;
   1102 
   1103       index = lp_build_const_int32(gallivm, i + scale_index_offset);
   1104       scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
   1105 
   1106       index = lp_build_const_int32(gallivm, i + trans_index_offset);
   1107       trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
   1108 
   1109       scale = lp_build_broadcast(gallivm, vs_type_llvm,
   1110                                  LLVMBuildLoad(builder, scale_i, "scale"));
   1111       trans = lp_build_broadcast(gallivm, vs_type_llvm,
   1112                                  LLVMBuildLoad(builder, trans_i, "trans"));
   1113 
   1114       /* divide by w */
   1115       out = LLVMBuildFMul(builder, out, out3, "");
   1116       /* mult by scale, add translation */
   1117       out = lp_build_fmuladd(builder, out, scale, trans);
   1118 
   1119       /* store transformed outputs */
   1120       LLVMBuildStore(builder, out, outputs[pos][i]);
   1121    }
   1122 
   1123 }
   1124 
   1125 
   1126 /**
   1127  * Returns clipmask as nxi32 bitmask for the n vertices
   1128  */
   1129 static LLVMValueRef
   1130 generate_clipmask(struct draw_llvm *llvm,
   1131                   struct gallivm_state *gallivm,
   1132                   struct lp_type vs_type,
   1133                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
   1134                   struct draw_llvm_variant_key *key,
   1135                   LLVMValueRef context_ptr,
   1136                   boolean *have_clipdist)
   1137 {
   1138    LLVMBuilderRef builder = gallivm->builder;
   1139    LLVMValueRef mask; /* stores the <nxi32> clipmasks */
   1140    LLVMValueRef test, temp;
   1141    LLVMValueRef zero, shift;
   1142    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
   1143    LLVMValueRef cv_x, cv_y, cv_z, cv_w;
   1144    LLVMValueRef plane1, planes, plane_ptr, sum;
   1145    struct lp_type f32_type = vs_type;
   1146    struct lp_type i32_type = lp_int_type(vs_type);
   1147    const unsigned pos = llvm->draw->vs.position_output;
   1148    const unsigned cv = llvm->draw->vs.clipvertex_output;
   1149    int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
   1150    boolean have_cd = false;
   1151    boolean clip_user = key->clip_user;
   1152    unsigned ucp_enable = key->ucp_enable;
   1153    unsigned cd[2];
   1154 
   1155    cd[0] = llvm->draw->vs.ccdistance_output[0];
   1156    cd[1] = llvm->draw->vs.ccdistance_output[1];
   1157 
   1158    if (cd[0] != pos || cd[1] != pos)
   1159       have_cd = true;
   1160 
   1161    if (num_written_clipdistance && !clip_user) {
   1162       clip_user = true;
   1163       ucp_enable = (1 << num_written_clipdistance) - 1;
   1164    }
   1165 
   1166    mask = lp_build_const_int_vec(gallivm, i32_type, 0);
   1167    temp = lp_build_const_int_vec(gallivm, i32_type, 0);
   1168    zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
   1169    shift = lp_build_const_int_vec(gallivm, i32_type, 1);    /* 1 1 1 1 */
   1170 
   1171    /*
   1172     * load clipvertex and position from correct locations.
   1173     * if they are the same just load them once.
   1174     */
   1175    pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 .. xn */
   1176    pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 .. yn */
   1177    pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 .. zn */
   1178    pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn */
   1179 
   1180    if (clip_user && cv != pos) {
   1181       cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 .. xn */
   1182       cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 .. yn */
   1183       cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 .. zn */
   1184       cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 .. wn */
   1185    } else {
   1186       cv_x = pos_x;
   1187       cv_y = pos_y;
   1188       cv_z = pos_z;
   1189       cv_w = pos_w;
   1190    }
   1191 
   1192    /*
   1193     * Be careful with the comparisons and NaNs (using llvm's unordered
   1194     * comparisons here).
   1195     */
   1196    /* Cliptest, for hardwired planes */
   1197    /*
   1198     * XXX should take guardband into account (currently not in key).
   1199     * Otherwise might run the draw pipeline stages for nothing.
   1200     */
   1201    if (key->clip_xy) {
   1202       /* plane 1 */
   1203       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
   1204       temp = shift;
   1205       test = LLVMBuildAnd(builder, test, temp, "");
   1206       mask = test;
   1207 
   1208       /* plane 2 */
   1209       test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
   1210       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
   1211       temp = LLVMBuildShl(builder, temp, shift, "");
   1212       test = LLVMBuildAnd(builder, test, temp, "");
   1213       mask = LLVMBuildOr(builder, mask, test, "");
   1214 
   1215       /* plane 3 */
   1216       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
   1217       temp = LLVMBuildShl(builder, temp, shift, "");
   1218       test = LLVMBuildAnd(builder, test, temp, "");
   1219       mask = LLVMBuildOr(builder, mask, test, "");
   1220 
   1221       /* plane 4 */
   1222       test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
   1223       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
   1224       temp = LLVMBuildShl(builder, temp, shift, "");
   1225       test = LLVMBuildAnd(builder, test, temp, "");
   1226       mask = LLVMBuildOr(builder, mask, test, "");
   1227    }
   1228 
   1229    if (key->clip_z) {
   1230       temp = lp_build_const_int_vec(gallivm, i32_type, 16);
   1231       if (key->clip_halfz) {
   1232          /* plane 5 */
   1233          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
   1234          test = LLVMBuildAnd(builder, test, temp, "");
   1235          mask = LLVMBuildOr(builder, mask, test, "");
   1236       }
   1237       else {
   1238          /* plane 5 */
   1239          test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
   1240          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
   1241          test = LLVMBuildAnd(builder, test, temp, "");
   1242          mask = LLVMBuildOr(builder, mask, test, "");
   1243       }
   1244       /* plane 6 */
   1245       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
   1246       temp = LLVMBuildShl(builder, temp, shift, "");
   1247       test = LLVMBuildAnd(builder, test, temp, "");
   1248       mask = LLVMBuildOr(builder, mask, test, "");
   1249    }
   1250 
   1251    if (clip_user) {
   1252       LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
   1253       LLVMValueRef indices[3];
   1254       LLVMValueRef is_nan_or_inf;
   1255 
   1256       /* userclip planes */
   1257       while (ucp_enable) {
   1258          unsigned plane_idx = ffs(ucp_enable)-1;
   1259          ucp_enable &= ~(1 << plane_idx);
   1260          plane_idx += 6;
   1261 
   1262          if (have_cd && num_written_clipdistance) {
   1263             LLVMValueRef clipdist;
   1264             int i;
   1265             i = plane_idx - 6;
   1266 
   1267             *have_clipdist = TRUE;
   1268             if (i < 4) {
   1269                clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], "");
   1270             } else {
   1271                clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");
   1272             }
   1273             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
   1274             is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);
   1275             test = LLVMBuildOr(builder, test, is_nan_or_inf, "");
   1276             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
   1277             test = LLVMBuildAnd(builder, test, temp, "");
   1278             mask = LLVMBuildOr(builder, mask, test, "");
   1279          } else {
   1280             LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
   1281             indices[0] = lp_build_const_int32(gallivm, 0);
   1282             indices[1] = lp_build_const_int32(gallivm, plane_idx);
   1283 
   1284             indices[2] = lp_build_const_int32(gallivm, 0);
   1285             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
   1286             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
   1287             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
   1288             sum = LLVMBuildFMul(builder, planes, cv_x, "");
   1289 
   1290             indices[2] = lp_build_const_int32(gallivm, 1);
   1291             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
   1292             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
   1293             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
   1294             sum = lp_build_fmuladd(builder, planes, cv_y, sum);
   1295 
   1296             indices[2] = lp_build_const_int32(gallivm, 2);
   1297             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
   1298             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
   1299             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
   1300             sum = lp_build_fmuladd(builder, planes, cv_z, sum);
   1301 
   1302             indices[2] = lp_build_const_int32(gallivm, 3);
   1303             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
   1304             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
   1305             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
   1306             sum = lp_build_fmuladd(builder, planes, cv_w, sum);
   1307 
   1308             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
   1309             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
   1310             test = LLVMBuildAnd(builder, test, temp, "");
   1311             mask = LLVMBuildOr(builder, mask, test, "");
   1312          }
   1313       }
   1314    }
   1315    if (key->need_edgeflags) {
   1316       /*
   1317        * This isn't really part of clipmask but stored the same in vertex
   1318        * header later, so do it here.
   1319        */
   1320       unsigned edge_attr = llvm->draw->vs.edgeflag_output;
   1321       LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
   1322       LLVMValueRef edgeflag = LLVMBuildLoad(builder, outputs[edge_attr][0], "");
   1323       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
   1324       temp = lp_build_const_int_vec(gallivm, i32_type,
   1325                                     1LL << DRAW_TOTAL_CLIP_PLANES);
   1326       test = LLVMBuildAnd(builder, test, temp, "");
   1327       mask = LLVMBuildOr(builder, mask, test, "");
   1328    }
   1329    return mask;
   1330 }
   1331 
   1332 
   1333 /**
   1334  * Returns boolean if any clipping has occurred
   1335  * Used zero/one i8 value to represent boolean
   1336  */
   1337 static LLVMValueRef
   1338 clipmask_booli8(struct gallivm_state *gallivm,
   1339                 const struct lp_type vs_type,
   1340                 LLVMValueRef clipmask_bool_ptr,
   1341                 boolean edgeflag_in_clipmask)
   1342 {
   1343    LLVMBuilderRef builder = gallivm->builder;
   1344    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
   1345    LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
   1346    LLVMValueRef ret;
   1347    struct lp_build_context bldivec;
   1348 
   1349    lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type));
   1350 
   1351    /*
   1352     * We need to invert the edgeflag bit from the clipmask here
   1353     * (because the result is really if we want to run the pipeline or not
   1354     * and we (may) need it if edgeflag was 0).
   1355     */
   1356    if (edgeflag_in_clipmask) {
   1357       LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type,
   1358                                                  1LL << DRAW_TOTAL_CLIP_PLANES);
   1359       clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
   1360    }
   1361 
   1362    /*
   1363     * XXX: probably should mask off bits from the mask which come from
   1364     * vertices which were beyond the count (i.e. indices_valid for
   1365     * linear fetches, for elts ones we don't have the correct mask
   1366     * right now). Otherwise might run the pipeline for nothing,
   1367     * though everything should still work.
   1368     */
   1369    ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool);
   1370    ret = LLVMBuildZExt(builder, ret, int8_type, "");
   1371    return ret;
   1372 }
   1373 
   1374 static LLVMValueRef
   1375 draw_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
   1376                          struct lp_build_tgsi_context * bld_base,
   1377                          boolean is_vindex_indirect,
   1378                          LLVMValueRef vertex_index,
   1379                          boolean is_aindex_indirect,
   1380                          LLVMValueRef attrib_index,
   1381                          LLVMValueRef swizzle_index)
   1382 {
   1383    const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
   1384    struct gallivm_state *gallivm = bld_base->base.gallivm;
   1385    LLVMBuilderRef builder = gallivm->builder;
   1386    LLVMValueRef indices[3];
   1387    LLVMValueRef res;
   1388    struct lp_type type = bld_base->base.type;
   1389 
   1390    if (is_vindex_indirect || is_aindex_indirect) {
   1391       int i;
   1392       res = bld_base->base.zero;
   1393       for (i = 0; i < type.length; ++i) {
   1394          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
   1395          LLVMValueRef vert_chan_index = vertex_index;
   1396          LLVMValueRef attr_chan_index = attrib_index;
   1397          LLVMValueRef channel_vec, value;
   1398 
   1399          if (is_vindex_indirect) {
   1400             vert_chan_index = LLVMBuildExtractElement(builder,
   1401                                                       vertex_index, idx, "");
   1402          }
   1403          if (is_aindex_indirect) {
   1404             attr_chan_index = LLVMBuildExtractElement(builder,
   1405                                                       attrib_index, idx, "");
   1406          }
   1407 
   1408          indices[0] = vert_chan_index;
   1409          indices[1] = attr_chan_index;
   1410          indices[2] = swizzle_index;
   1411 
   1412          channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, "");
   1413          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
   1414          value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
   1415 
   1416          res = LLVMBuildInsertElement(builder, res, value, idx, "");
   1417       }
   1418    } else {
   1419       indices[0] = vertex_index;
   1420       indices[1] = attrib_index;
   1421       indices[2] = swizzle_index;
   1422 
   1423       res = LLVMBuildGEP(builder, gs->input, indices, 3, "");
   1424       res = LLVMBuildLoad(builder, res, "");
   1425    }
   1426 
   1427    return res;
   1428 }
   1429 
   1430 static void
   1431 draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
   1432                          struct lp_build_tgsi_context * bld_base,
   1433                          LLVMValueRef (*outputs)[4],
   1434                          LLVMValueRef emitted_vertices_vec)
   1435 {
   1436    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
   1437    struct draw_gs_llvm_variant *variant = gs_iface->variant;
   1438    struct gallivm_state *gallivm = variant->gallivm;
   1439    LLVMBuilderRef builder = gallivm->builder;
   1440    struct lp_type gs_type = bld_base->base.type;
   1441    LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
   1442                                                   lp_int_type(gs_type), 0);
   1443    LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
   1444    LLVMValueRef next_prim_offset =
   1445       lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
   1446    LLVMValueRef io = variant->io_ptr;
   1447    unsigned i;
   1448    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
   1449 
   1450    for (i = 0; i < gs_type.length; ++i) {
   1451       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
   1452       LLVMValueRef currently_emitted =
   1453          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
   1454       indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
   1455       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
   1456    }
   1457 
   1458    convert_to_aos(gallivm, io, indices,
   1459                   outputs, clipmask,
   1460                   gs_info->num_outputs, gs_type,
   1461                   FALSE);
   1462 }
   1463 
   1464 static void
   1465 draw_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
   1466                            struct lp_build_tgsi_context * bld_base,
   1467                            LLVMValueRef verts_per_prim_vec,
   1468                            LLVMValueRef emitted_prims_vec)
   1469 {
   1470    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
   1471    struct draw_gs_llvm_variant *variant = gs_iface->variant;
   1472    struct gallivm_state *gallivm = variant->gallivm;
   1473    LLVMBuilderRef builder = gallivm->builder;
   1474    LLVMValueRef prim_lengts_ptr =
   1475       draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr);
   1476    unsigned i;
   1477 
   1478    for (i = 0; i < bld_base->base.type.length; ++i) {
   1479       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
   1480       LLVMValueRef prims_emitted =
   1481          LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
   1482       LLVMValueRef store_ptr;
   1483       LLVMValueRef num_vertices =
   1484          LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
   1485 
   1486       store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, "");
   1487       store_ptr = LLVMBuildLoad(builder, store_ptr, "");
   1488       store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");
   1489       LLVMBuildStore(builder, num_vertices, store_ptr);
   1490    }
   1491 }
   1492 
   1493 static void
   1494 draw_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
   1495                       struct lp_build_tgsi_context * bld_base,
   1496                       LLVMValueRef total_emitted_vertices_vec,
   1497                       LLVMValueRef emitted_prims_vec)
   1498 {
   1499    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
   1500    struct draw_gs_llvm_variant *variant = gs_iface->variant;
   1501    struct gallivm_state *gallivm = variant->gallivm;
   1502    LLVMBuilderRef builder = gallivm->builder;
   1503    LLVMValueRef emitted_verts_ptr =
   1504       draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr);
   1505    LLVMValueRef emitted_prims_ptr =
   1506       draw_gs_jit_emitted_prims(gallivm, variant->context_ptr);
   1507    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
   1508 
   1509    emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, "");
   1510    emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, "");
   1511 
   1512    LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
   1513    LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
   1514 }
   1515 
   1516 static void
   1517 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
   1518 {
   1519    struct gallivm_state *gallivm = variant->gallivm;
   1520    LLVMContextRef context = gallivm->context;
   1521    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
   1522    LLVMTypeRef arg_types[11];
   1523    unsigned num_arg_types = ARRAY_SIZE(arg_types);
   1524    LLVMTypeRef func_type;
   1525    LLVMValueRef context_ptr;
   1526    LLVMBasicBlockRef block;
   1527    LLVMBuilderRef builder;
   1528    char func_name[64];
   1529    struct lp_type vs_type;
   1530    LLVMValueRef count, fetch_elts, start_or_maxelt;
   1531    LLVMValueRef vertex_id_offset, start_instance;
   1532    LLVMValueRef stride, step, io_itr;
   1533    LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp;
   1534    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
   1535    LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
   1536    LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
   1537    LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
   1538    LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
   1539    LLVMValueRef fake_buf_ptr, fake_buf;
   1540 
   1541    struct draw_context *draw = llvm->draw;
   1542    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
   1543    unsigned i, j;
   1544    struct lp_build_context bld, blduivec;
   1545    struct lp_build_loop_state lp_loop;
   1546    struct lp_build_if_state if_ctx;
   1547    const int vector_length = lp_native_vector_width / 32;
   1548    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
   1549    struct lp_build_sampler_soa *sampler = 0;
   1550    LLVMValueRef ret, clipmask_bool_ptr;
   1551    struct draw_llvm_variant_key *key = &variant->key;
   1552    /* If geometry shader is present we need to skip both the viewport
   1553     * transformation and clipping otherwise the inputs to the geometry
   1554     * shader will be incorrect.
   1555     * The code can't handle vp transform when vs writes vp index neither
   1556     * (though this would be fixable here, but couldn't just broadcast
   1557     * the values).
   1558     */
   1559    const boolean bypass_viewport = key->has_gs || key->bypass_viewport ||
   1560                                    vs_info->writes_viewport_index;
   1561    const boolean enable_cliptest = !key->has_gs && (key->clip_xy ||
   1562                                                     key->clip_z ||
   1563                                                     key->clip_user ||
   1564                                                     key->need_edgeflags);
   1565    LLVMValueRef variant_func;
   1566    const unsigned pos = draw->vs.position_output;
   1567    const unsigned cv = draw->vs.clipvertex_output;
   1568    boolean have_clipdist = FALSE;
   1569    struct lp_bld_tgsi_system_values system_values;
   1570 
   1571    memset(&system_values, 0, sizeof(system_values));
   1572 
   1573    util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u",
   1574                  variant->shader->variants_cached);
   1575 
   1576    i = 0;
   1577    arg_types[i++] = get_context_ptr_type(variant);       /* context */
   1578    arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
   1579    arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
   1580    arg_types[i++] = int32_type;                          /* count */
   1581    arg_types[i++] = int32_type;                          /* start/fetch_elt_max */
   1582    arg_types[i++] = int32_type;                          /* stride */
   1583    arg_types[i++] = get_vb_ptr_type(variant);            /* pipe_vertex_buffer's */
   1584    arg_types[i++] = int32_type;                          /* instance_id */
   1585    arg_types[i++] = int32_type;                          /* vertex_id_offset */
   1586    arg_types[i++] = int32_type;                          /* start_instance */
   1587    arg_types[i++] = LLVMPointerType(int32_type, 0);      /* fetch_elts  */
   1588 
   1589    func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
   1590                                 arg_types, num_arg_types, 0);
   1591 
   1592    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
   1593    variant->function = variant_func;
   1594 
   1595    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
   1596    for (i = 0; i < num_arg_types; ++i)
   1597       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
   1598          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
   1599 
   1600    context_ptr               = LLVMGetParam(variant_func, 0);
   1601    io_ptr                    = LLVMGetParam(variant_func, 1);
   1602    vbuffers_ptr              = LLVMGetParam(variant_func, 2);
   1603    count                     = LLVMGetParam(variant_func, 3);
   1604    /*
   1605     * XXX: the maxelt part is unused. Not really useful, since we cannot
   1606     * get index buffer overflows due to vsplit (which provides its own
   1607     * elts buffer, with a different size than what's passed in here).
   1608     */
   1609    start_or_maxelt           = LLVMGetParam(variant_func, 4);
   1610    /*
   1611     * XXX: stride is actually unused. The stride we use is strictly calculated
   1612     * from the number of outputs (including the draw_extra outputs).
   1613     * Should probably fix some day (we need a new vs just because of extra
   1614     * outputs which the generated vs won't touch).
   1615     */
   1616    stride                    = LLVMGetParam(variant_func, 5);
   1617    vb_ptr                    = LLVMGetParam(variant_func, 6);
   1618    system_values.instance_id = LLVMGetParam(variant_func, 7);
   1619    vertex_id_offset          = LLVMGetParam(variant_func, 8);
   1620    start_instance            = LLVMGetParam(variant_func, 9);
   1621    fetch_elts                = LLVMGetParam(variant_func, 10);
   1622 
   1623    lp_build_name(context_ptr, "context");
   1624    lp_build_name(io_ptr, "io");
   1625    lp_build_name(vbuffers_ptr, "vbuffers");
   1626    lp_build_name(count, "count");
   1627    lp_build_name(start_or_maxelt, "start_or_maxelt");
   1628    lp_build_name(stride, "stride");
   1629    lp_build_name(vb_ptr, "vb");
   1630    lp_build_name(system_values.instance_id, "instance_id");
   1631    lp_build_name(vertex_id_offset, "vertex_id_offset");
   1632    lp_build_name(start_instance, "start_instance");
   1633    lp_build_name(fetch_elts, "fetch_elts");
   1634 
   1635    /*
   1636     * Function body
   1637     */
   1638 
   1639    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
   1640    builder = gallivm->builder;
   1641    LLVMPositionBuilderAtEnd(builder, block);
   1642 
   1643    memset(&vs_type, 0, sizeof vs_type);
   1644    vs_type.floating = TRUE; /* floating point values */
   1645    vs_type.sign = TRUE;     /* values are signed */
   1646    vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
   1647    vs_type.width = 32;      /* 32-bit float */
   1648    vs_type.length = vector_length;
   1649 
   1650    lp_build_context_init(&bld, gallivm, lp_type_uint(32));
   1651    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
   1652 
   1653    /* hold temporary "bool" clipmask */
   1654    clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, "");
   1655 
   1656    fake_buf = lp_build_alloca_undef(gallivm,
   1657                  LLVMVectorType(LLVMInt64TypeInContext(context), 4), "");
   1658    fake_buf = LLVMBuildBitCast(builder, fake_buf,
   1659                  LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
   1660    fake_buf_ptr = LLVMBuildGEP(builder, fake_buf, &bld.zero, 1, "");
   1661 
   1662    /* code generated texture sampling */
   1663    sampler = draw_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key));
   1664 
   1665    step = lp_build_const_int32(gallivm, vector_length);
   1666 
   1667    ind_vec = blduivec.undef;
   1668    for (i = 0; i < vs_type.length; i++) {
   1669       LLVMValueRef index = lp_build_const_int32(gallivm, i);
   1670       ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
   1671    }
   1672 
   1673    fetch_max = lp_build_alloca(gallivm, int32_type, "fetch_max");
   1674 
   1675    have_elts = LLVMBuildICmp(builder, LLVMIntNE,
   1676                              LLVMConstPointerNull(arg_types[10]), fetch_elts, "");
   1677 
   1678    fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max");
   1679    fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);
   1680    /*
   1681     * Only needed for non-indexed path.
   1682     */
   1683    start_vec = lp_build_broadcast_scalar(&blduivec, start_or_maxelt);
   1684 
   1685    /*
   1686     * Pre-calculate everything which is constant per shader invocation.
   1687     */
   1688    for (j = 0; j < key->nr_vertex_elements; ++j) {
   1689       LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr;
   1690       LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit;
   1691       struct pipe_vertex_element *velem = &key->vertex_element[j];
   1692       LLVMValueRef vb_index =
   1693          lp_build_const_int32(gallivm, velem->vertex_buffer_index);
   1694       LLVMValueRef bsize = lp_build_const_int32(gallivm,
   1695                                                 util_format_get_blocksize(velem->src_format));
   1696       LLVMValueRef src_offset = lp_build_const_int32(gallivm,
   1697                                                      velem->src_offset);
   1698       struct lp_build_if_state if_ctx;
   1699 
   1700       if (velem->src_format != PIPE_FORMAT_NONE) {
   1701          vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &vb_index, 1, "");
   1702          vb_info = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
   1703          vb_stride[j] = draw_jit_vbuffer_stride(gallivm, vb_info);
   1704          vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vb_info);
   1705          map_ptr[j] = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr);
   1706          buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
   1707 
   1708          ofbit = NULL;
   1709          /*
   1710           * We'll set buffer_size_adj to zero if we have of, so it will
   1711           * always overflow later automatically without having to keep ofbit.
   1712           * Overflows (with normal wraparound) doing the actual offset
   1713           * calculation should be ok, just not for the buffer size calc.
   1714           * It would also be possible to detect such overflows and return
   1715           * zeros if that happens, but this would be more complex.
   1716           */
   1717          buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset);
   1718          tmp = lp_build_sub(&bld, bsize, bld.one);
   1719          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
   1720                                                      &ofbit);
   1721          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
   1722                                                      buf_offset, &ofbit);
   1723 
   1724          /*
   1725           * We can't easily set fake vertex buffers outside the generated code.
   1726           * Hence, set fake vertex buffers here instead basically, so fetch
   1727           * code can always fetch using offset 0, eliminating all control flow
   1728           * inside the main loop.
   1729           * (Alternatively, could have control flow per vector skipping fetch
   1730           * if ofbit is true.)
   1731           */
   1732          if (velem->instance_divisor) {
   1733             /*
   1734              * Index is equal to the start instance plus the number of current
   1735              * instance divided by the divisor. In this case we compute it as:
   1736              * index = start_instance + (instance_id  / divisor).
   1737              * Note we could actually do the fetch here, outside the loop -
   1738              * it's all constant, hopefully llvm recognizes this.
   1739              */
   1740             LLVMValueRef current_instance;
   1741             current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
   1742                                              lp_build_const_int32(gallivm,
   1743                                                                   velem->instance_divisor),
   1744                                              "instance_divisor");
   1745             instance_index[j] = lp_build_uadd_overflow(gallivm, start_instance,
   1746                                                        current_instance, &ofbit);
   1747          }
   1748 
   1749          buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
   1750                                               buffer_size_adj[j], "");
   1751 
   1752          temp_ptr = lp_build_alloca_undef(gallivm,
   1753                        LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
   1754 
   1755          lp_build_if(&if_ctx, gallivm, ofbit);
   1756          {
   1757             LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
   1758          }
   1759          lp_build_else(&if_ctx);
   1760          {
   1761             map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");
   1762             LLVMBuildStore(builder, map_ptr[j], temp_ptr);
   1763          }
   1764          lp_build_endif(&if_ctx);
   1765          map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr");
   1766 
   1767          if (0) {
   1768             lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
   1769                             lp_build_const_int32(gallivm, j),
   1770                             vb_index, vb_stride[j]);
   1771             lp_build_printf(gallivm,
   1772                             "   vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n",
   1773                             vb_buffer_offset, src_offset, buf_offset);
   1774             lp_build_printf(gallivm, "   buffer size = %u, blocksize = %u\n",
   1775                             buffer_size, bsize);
   1776             lp_build_printf(gallivm, "   instance_id = %u\n", system_values.instance_id);
   1777          }
   1778       }
   1779    }
   1780 
   1781    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
   1782    {
   1783       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
   1784       LLVMValueRef io;
   1785       LLVMValueRef clipmask;   /* holds the clipmask value */
   1786       LLVMValueRef true_index_array, index_store;
   1787       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
   1788 
   1789       io_itr = lp_loop.counter;
   1790 
   1791       io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
   1792 #if DEBUG_STORE
   1793       lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
   1794                       io_itr, io, lp_loop.counter);
   1795 #endif
   1796 
   1797       true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);
   1798       true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");
   1799 
   1800       /*
   1801        * Limit indices to fetch_max, otherwise might try to access indices
   1802        * beyond index buffer (or rather vsplit elt buffer) size.
   1803        * Could probably safely (?) skip this for non-indexed draws and
   1804        * simplify things minimally (by removing it could combine the ind_vec
   1805        * and start_vec adds). I think the only effect for non-indexed draws will
   1806        * be that for the invalid elements they will be all fetched from the
   1807        * same location as the last valid one, but noone should really care.
   1808        */
   1809       true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);
   1810 
   1811       index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");
   1812 
   1813       lp_build_if(&if_ctx, gallivm, have_elts);
   1814       {
   1815          /*
   1816           * Note: you'd expect some comparison/clamp against fetch_elt_max
   1817           * here.
   1818           * There used to be one here but it was incorrect: overflow was
   1819           * detected if index > fetch_elt_max - but the correct condition
   1820           * would be index >= fetch_elt_max (since this is just size of elts
   1821           * buffer / element size).
   1822           * Using the correct condition however will cause failures - due to
   1823           * vsplit/vcache code which rebases indices. So, as an example, if
   1824           * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will
   1825           * replace all invalid indices with 0 - which in case of elt_bias
   1826           * not being zero will get a different fetch index than the valid
   1827           * index 0. So, just rely on vsplit code preventing out-of-bounds
   1828           * fetches. This is also why it's safe to do elts fetch even if there
   1829           * was no index buffer bound - the real buffer is never seen here, at
   1830           * least not if there are index buffer overflows...
   1831           */
   1832 
   1833          /*
   1834           * XXX should not have to do this, as scale can be handled
   1835           * natively by loads (hits asserts though).
   1836           */
   1837          tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);
   1838          fetch_elts = LLVMBuildBitCast(builder, fetch_elts,
   1839                                        LLVMPointerType(LLVMInt8TypeInContext(context),
   1840                                                        0), "");
   1841          tmp = lp_build_gather(gallivm, vs_type.length,
   1842                                32, bld.type, TRUE,
   1843                                fetch_elts, tmp, FALSE);
   1844          LLVMBuildStore(builder, tmp, index_store);
   1845       }
   1846       lp_build_else(&if_ctx);
   1847       {
   1848          tmp = LLVMBuildAdd(builder, true_index_array, start_vec, "");
   1849          LLVMBuildStore(builder, tmp, index_store);
   1850       }
   1851       lp_build_endif(&if_ctx);
   1852 
   1853       true_index_array = LLVMBuildLoad(builder, index_store, "");
   1854 
   1855       for (j = 0; j < key->nr_vertex_elements; ++j) {
   1856          struct pipe_vertex_element *velem = &key->vertex_element[j];
   1857          const struct util_format_description *format_desc =
   1858             util_format_description(velem->src_format);
   1859 
   1860          if (format_desc->format == PIPE_FORMAT_NONE) {
   1861             for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
   1862                inputs[j][i] = lp_build_zero(gallivm, vs_type);
   1863             }
   1864          }
   1865          else if (velem->instance_divisor) {
   1866             fetch_instanced(gallivm, format_desc, vs_type,
   1867                             vb_stride[j], map_ptr[j],
   1868                             buffer_size_adj[j],
   1869                             inputs[j], instance_index[j]);
   1870          }
   1871          else {
   1872             fetch_vector(gallivm, format_desc, vs_type,
   1873                          vb_stride[j], map_ptr[j],
   1874                          buffer_size_adj[j],
   1875                          inputs[j], true_index_array);
   1876          }
   1877       }
   1878 
   1879       /* In the paths with elts vertex id has to be unaffected by the
   1880        * index bias and because indices inside our elements array have
   1881        * already had index bias applied we need to subtract it here to
   1882        * get back to the original index.
   1883        * in the linear paths vertex id has to be unaffected by the
   1884        * original start index and because we abuse the 'start' variable
   1885        * to either represent the actual start index or the index at which
   1886        * the primitive was split (we split rendering into chunks of at
   1887        * most 4095-vertices) we need to back out the original start
   1888        * index out of our vertex id here.
   1889        */
   1890       system_values.basevertex = lp_build_broadcast_scalar(&blduivec,
   1891                                                            vertex_id_offset);
   1892       system_values.vertex_id = true_index_array;
   1893       system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,
   1894                                                       system_values.basevertex, "");
   1895 
   1896       ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
   1897       generate_vs(variant,
   1898                   builder,
   1899                   vs_type,
   1900                   outputs,
   1901                   ptr_aos,
   1902                   &system_values,
   1903                   context_ptr,
   1904                   sampler,
   1905                   key->clamp_vertex_color);
   1906 
   1907       if (pos != -1 && cv != -1) {
   1908          /* store original positions in clip before further manipulation */
   1909          store_clip(gallivm, vs_type, io, outputs, pos);
   1910 
   1911          /* do cliptest */
   1912          if (enable_cliptest) {
   1913             LLVMValueRef temp = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
   1914             /* allocate clipmask, assign it integer type */
   1915             clipmask = generate_clipmask(llvm,
   1916                                          gallivm,
   1917                                          vs_type,
   1918                                          outputs,
   1919                                          key,
   1920                                          context_ptr, &have_clipdist);
   1921             temp = LLVMBuildOr(builder, clipmask, temp, "");
   1922             /* store temporary clipping boolean value */
   1923             LLVMBuildStore(builder, temp, clipmask_bool_ptr);
   1924          }
   1925          else {
   1926             clipmask = blduivec.zero;
   1927          }
   1928 
   1929          /* do viewport mapping */
   1930          if (!bypass_viewport) {
   1931             generate_viewport(variant, builder, vs_type, outputs, context_ptr);
   1932          }
   1933       }
   1934       else {
   1935          clipmask = blduivec.zero;
   1936       }
   1937 
   1938       /* store clipmask in vertex header,
   1939        * original positions in clip
   1940        * and transformed positions in data
   1941        */
   1942       convert_to_aos(gallivm, io, NULL, outputs, clipmask,
   1943                      vs_info->num_outputs, vs_type,
   1944                      enable_cliptest && key->need_edgeflags);
   1945    }
   1946    lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
   1947 
   1948    sampler->destroy(sampler);
   1949 
   1950    /* return clipping boolean value for function */
   1951    ret = clipmask_booli8(gallivm, vs_type, clipmask_bool_ptr,
   1952                          enable_cliptest && key->need_edgeflags);
   1953 
   1954    LLVMBuildRet(builder, ret);
   1955 
   1956    gallivm_verify_function(gallivm, variant_func);
   1957 }
   1958 
   1959 
   1960 struct draw_llvm_variant_key *
   1961 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
   1962 {
   1963    unsigned i;
   1964    struct draw_llvm_variant_key *key;
   1965    struct draw_sampler_static_state *draw_sampler;
   1966 
   1967    key = (struct draw_llvm_variant_key *)store;
   1968 
   1969    memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0]));
   1970 
   1971    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/
   1972 
   1973    /* will have to rig this up properly later */
   1974    key->clip_xy = llvm->draw->clip_xy;
   1975    key->clip_z = llvm->draw->clip_z;
   1976    key->clip_user = llvm->draw->clip_user;
   1977    key->bypass_viewport = llvm->draw->bypass_viewport;
   1978    key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
   1979    /* XXX assumes edgeflag output not at 0 */
   1980    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
   1981    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
   1982    key->has_gs = llvm->draw->gs.geometry_shader != NULL;
   1983    key->num_outputs = draw_total_vs_outputs(llvm->draw);
   1984 
   1985    /* All variants of this shader will have the same value for
   1986     * nr_samplers.  Not yet trying to compact away holes in the
   1987     * sampler array.
   1988     */
   1989    key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
   1990    if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
   1991       key->nr_sampler_views =
   1992          llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
   1993    }
   1994    else {
   1995       key->nr_sampler_views = key->nr_samplers;
   1996    }
   1997 
   1998    /* Presumably all variants of the shader should have the same
   1999     * number of vertex elements - ie the number of shader inputs.
   2000     * NOTE: we NEED to store the needed number of needed inputs
   2001     * here, not the number of provided elements to match keysize
   2002     * (and the offset of sampler state in the key).
   2003     * If we have excess number of vertex elements, this is valid,
   2004     * but the excess ones don't matter.
   2005     * If we don't have enough vertex elements (which looks not really
   2006     * valid but we'll handle it gracefully) fill out missing ones with
   2007     * zero (we'll recognize these later by PIPE_FORMAT_NONE).
   2008     */
   2009    key->nr_vertex_elements =
   2010       llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
   2011 
   2012    if (llvm->draw->pt.nr_vertex_elements < key->nr_vertex_elements) {
   2013       debug_printf("draw: vs with %d inputs but only have %d vertex elements\n",
   2014                    key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements);
   2015       memset(key->vertex_element, 0,
   2016              sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
   2017    }
   2018    memcpy(key->vertex_element,
   2019           llvm->draw->pt.vertex_element,
   2020           sizeof(struct pipe_vertex_element) *
   2021              MIN2(key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements));
   2022 
   2023    draw_sampler = draw_llvm_variant_key_samplers(key);
   2024    memset(draw_sampler, 0,
   2025           MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
   2026 
   2027    for (i = 0 ; i < key->nr_samplers; i++) {
   2028       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
   2029                                       llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
   2030    }
   2031    for (i = 0 ; i < key->nr_sampler_views; i++) {
   2032       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
   2033                                       llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
   2034    }
   2035 
   2036    return key;
   2037 }
   2038 
   2039 
   2040 void
   2041 draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
   2042 {
   2043    unsigned i;
   2044    struct draw_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
   2045 
   2046    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
   2047    debug_printf("clip_xy = %u\n", key->clip_xy);
   2048    debug_printf("clip_z = %u\n", key->clip_z);
   2049    debug_printf("clip_user = %u\n", key->clip_user);
   2050    debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
   2051    debug_printf("clip_halfz = %u\n", key->clip_halfz);
   2052    debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
   2053    debug_printf("has_gs = %u\n", key->has_gs);
   2054    debug_printf("ucp_enable = %u\n", key->ucp_enable);
   2055 
   2056    for (i = 0 ; i < key->nr_vertex_elements; i++) {
   2057       debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
   2058       debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
   2059       debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
   2060       debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
   2061    }
   2062 
   2063    for (i = 0 ; i < key->nr_sampler_views; i++) {
   2064       debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
   2065    }
   2066 }
   2067 
   2068 
   2069 void
   2070 draw_llvm_set_mapped_texture(struct draw_context *draw,
   2071                              unsigned shader_stage,
   2072                              unsigned sview_idx,
   2073                              uint32_t width, uint32_t height, uint32_t depth,
   2074                              uint32_t first_level, uint32_t last_level,
   2075                              const void *base_ptr,
   2076                              uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
   2077                              uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
   2078                              uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
   2079 {
   2080    unsigned j;
   2081    struct draw_jit_texture *jit_tex;
   2082 
   2083    assert(shader_stage == PIPE_SHADER_VERTEX ||
   2084           shader_stage == PIPE_SHADER_GEOMETRY);
   2085 
   2086    if (shader_stage == PIPE_SHADER_VERTEX) {
   2087       assert(sview_idx < ARRAY_SIZE(draw->llvm->jit_context.textures));
   2088 
   2089       jit_tex = &draw->llvm->jit_context.textures[sview_idx];
   2090    } else if (shader_stage == PIPE_SHADER_GEOMETRY) {
   2091       assert(sview_idx < ARRAY_SIZE(draw->llvm->gs_jit_context.textures));
   2092 
   2093       jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx];
   2094    } else {
   2095       assert(0);
   2096       return;
   2097    }
   2098 
   2099    jit_tex->width = width;
   2100    jit_tex->height = height;
   2101    jit_tex->depth = depth;
   2102    jit_tex->first_level = first_level;
   2103    jit_tex->last_level = last_level;
   2104    jit_tex->base = base_ptr;
   2105 
   2106    for (j = first_level; j <= last_level; j++) {
   2107       jit_tex->mip_offsets[j] = mip_offsets[j];
   2108       jit_tex->row_stride[j] = row_stride[j];
   2109       jit_tex->img_stride[j] = img_stride[j];
   2110    }
   2111 }
   2112 
   2113 
   2114 void
   2115 draw_llvm_set_sampler_state(struct draw_context *draw,
   2116                             unsigned shader_type)
   2117 {
   2118    unsigned i;
   2119 
   2120    if (shader_type == PIPE_SHADER_VERTEX) {
   2121       for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) {
   2122          struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i];
   2123 
   2124          if (draw->samplers[PIPE_SHADER_VERTEX][i]) {
   2125             const struct pipe_sampler_state *s
   2126                = draw->samplers[PIPE_SHADER_VERTEX][i];
   2127             jit_sam->min_lod = s->min_lod;
   2128             jit_sam->max_lod = s->max_lod;
   2129             jit_sam->lod_bias = s->lod_bias;
   2130             COPY_4V(jit_sam->border_color, s->border_color.f);
   2131          }
   2132       }
   2133    } else if (shader_type == PIPE_SHADER_GEOMETRY) {
   2134       for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) {
   2135          struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i];
   2136 
   2137          if (draw->samplers[PIPE_SHADER_GEOMETRY][i]) {
   2138             const struct pipe_sampler_state *s
   2139                = draw->samplers[PIPE_SHADER_GEOMETRY][i];
   2140             jit_sam->min_lod = s->min_lod;
   2141             jit_sam->max_lod = s->max_lod;
   2142             jit_sam->lod_bias = s->lod_bias;
   2143             COPY_4V(jit_sam->border_color, s->border_color.f);
   2144          }
   2145       }
   2146    }
   2147 }
   2148 
   2149 
   2150 void
   2151 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
   2152 {
   2153    struct draw_llvm *llvm = variant->llvm;
   2154 
   2155    gallivm_destroy(variant->gallivm);
   2156 
   2157    remove_from_list(&variant->list_item_local);
   2158    variant->shader->variants_cached--;
   2159    remove_from_list(&variant->list_item_global);
   2160    llvm->nr_variants--;
   2161    FREE(variant);
   2162 }
   2163 
   2164 
   2165 /**
   2166  * Create LLVM types for various structures.
   2167  */
   2168 static void
   2169 create_gs_jit_types(struct draw_gs_llvm_variant *var)
   2170 {
   2171    struct gallivm_state *gallivm = var->gallivm;
   2172    LLVMTypeRef texture_type, sampler_type, context_type;
   2173 
   2174    texture_type = create_jit_texture_type(gallivm, "texture");
   2175    sampler_type = create_jit_sampler_type(gallivm, "sampler");
   2176 
   2177    context_type = create_gs_jit_context_type(gallivm,
   2178                                              var->shader->base.vector_length,
   2179                                              texture_type, sampler_type,
   2180                                              "draw_gs_jit_context");
   2181    var->context_ptr_type = LLVMPointerType(context_type, 0);
   2182 
   2183    var->input_array_type = create_gs_jit_input_type(gallivm);
   2184 }
   2185 
   2186 static LLVMTypeRef
   2187 get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
   2188 {
   2189    if (!variant->context_ptr_type)
   2190       create_gs_jit_types(variant);
   2191    return variant->context_ptr_type;
   2192 }
   2193 
   2194 static LLVMValueRef
   2195 generate_mask_value(struct draw_gs_llvm_variant *variant,
   2196                     struct lp_type gs_type)
   2197 {
   2198    struct gallivm_state *gallivm = variant->gallivm;
   2199    LLVMBuilderRef builder = gallivm->builder;
   2200    struct lp_type mask_type = lp_int_type(gs_type);
   2201    LLVMValueRef num_prims;
   2202    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
   2203    unsigned i;
   2204 
   2205    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
   2206                                   variant->num_prims);
   2207    for (i = 0; i < gs_type.length; i++) {
   2208       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
   2209       mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
   2210    }
   2211    mask_val = lp_build_compare(gallivm, mask_type,
   2212                                PIPE_FUNC_GREATER, num_prims, mask_val);
   2213 
   2214    return mask_val;
   2215 }
   2216 
   2217 static void
   2218 draw_gs_llvm_generate(struct draw_llvm *llvm,
   2219                       struct draw_gs_llvm_variant *variant)
   2220 {
   2221    struct gallivm_state *gallivm = variant->gallivm;
   2222    LLVMContextRef context = gallivm->context;
   2223    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
   2224    LLVMTypeRef arg_types[7];
   2225    LLVMTypeRef func_type;
   2226    LLVMValueRef variant_func;
   2227    LLVMValueRef context_ptr;
   2228    LLVMValueRef prim_id_ptr;
   2229    LLVMBasicBlockRef block;
   2230    LLVMBuilderRef builder;
   2231    LLVMValueRef io_ptr, input_array, num_prims, mask_val;
   2232    struct lp_build_sampler_soa *sampler = 0;
   2233    struct lp_build_context bld;
   2234    struct lp_bld_tgsi_system_values system_values;
   2235    char func_name[64];
   2236    struct lp_type gs_type;
   2237    unsigned i;
   2238    struct draw_gs_llvm_iface gs_iface;
   2239    const struct tgsi_token *tokens = variant->shader->base.state.tokens;
   2240    LLVMValueRef consts_ptr, num_consts_ptr;
   2241    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
   2242    struct lp_build_mask_context mask;
   2243    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
   2244    unsigned vector_length = variant->shader->base.vector_length;
   2245 
   2246    memset(&system_values, 0, sizeof(system_values));
   2247 
   2248    util_snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant%u",
   2249                  variant->shader->variants_cached);
   2250 
   2251    assert(variant->vertex_header_ptr_type);
   2252 
   2253    arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
   2254    arg_types[1] = variant->input_array_type;           /* input */
   2255    arg_types[2] = variant->vertex_header_ptr_type;     /* vertex_header */
   2256    arg_types[3] = int32_type;                          /* num_prims */
   2257    arg_types[4] = int32_type;                          /* instance_id */
   2258    arg_types[5] = LLVMPointerType(
   2259       LLVMVectorType(int32_type, vector_length), 0);   /* prim_id_ptr */
   2260    arg_types[6] = int32_type;
   2261 
   2262    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
   2263 
   2264    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
   2265 
   2266    variant->function = variant_func;
   2267 
   2268    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
   2269 
   2270    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
   2271       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
   2272          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
   2273 
   2274    context_ptr               = LLVMGetParam(variant_func, 0);
   2275    input_array               = LLVMGetParam(variant_func, 1);
   2276    io_ptr                    = LLVMGetParam(variant_func, 2);
   2277    num_prims                 = LLVMGetParam(variant_func, 3);
   2278    system_values.instance_id = LLVMGetParam(variant_func, 4);
   2279    prim_id_ptr               = LLVMGetParam(variant_func, 5);
   2280    system_values.invocation_id = LLVMGetParam(variant_func, 6);
   2281 
   2282    lp_build_name(context_ptr, "context");
   2283    lp_build_name(input_array, "input");
   2284    lp_build_name(io_ptr, "io");
   2285    lp_build_name(num_prims, "num_prims");
   2286    lp_build_name(system_values.instance_id, "instance_id");
   2287    lp_build_name(prim_id_ptr, "prim_id_ptr");
   2288    lp_build_name(system_values.invocation_id, "invocation_id");
   2289 
   2290    variant->context_ptr = context_ptr;
   2291    variant->io_ptr = io_ptr;
   2292    variant->num_prims = num_prims;
   2293 
   2294    gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
   2295    gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
   2296    gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
   2297    gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
   2298    gs_iface.input = input_array;
   2299    gs_iface.variant = variant;
   2300 
   2301    /*
   2302     * Function body
   2303     */
   2304 
   2305    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
   2306    builder = gallivm->builder;
   2307    LLVMPositionBuilderAtEnd(builder, block);
   2308 
   2309    lp_build_context_init(&bld, gallivm, lp_type_int(32));
   2310 
   2311    memset(&gs_type, 0, sizeof gs_type);
   2312    gs_type.floating = TRUE; /* floating point values */
   2313    gs_type.sign = TRUE;     /* values are signed */
   2314    gs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
   2315    gs_type.width = 32;      /* 32-bit float */
   2316    gs_type.length = vector_length;
   2317 
   2318    consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr);
   2319    num_consts_ptr =
   2320       draw_gs_jit_context_num_constants(variant->gallivm, context_ptr);
   2321 
   2322    /* code generated texture sampling */
   2323    sampler = draw_llvm_sampler_soa_create(variant->key.samplers);
   2324 
   2325    mask_val = generate_mask_value(variant, gs_type);
   2326    lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
   2327 
   2328    if (gs_info->uses_primid) {
   2329       system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");
   2330    }
   2331 
   2332    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
   2333       tgsi_dump(tokens, 0);
   2334       draw_gs_llvm_dump_variant_key(&variant->key);
   2335    }
   2336 
   2337    lp_build_tgsi_soa(variant->gallivm,
   2338                      tokens,
   2339                      gs_type,
   2340                      &mask,
   2341                      consts_ptr,
   2342                      num_consts_ptr,
   2343                      &system_values,
   2344                      NULL,
   2345                      outputs,
   2346                      context_ptr,
   2347                      NULL,
   2348                      sampler,
   2349                      &llvm->draw->gs.geometry_shader->info,
   2350                      (const struct lp_build_tgsi_gs_iface *)&gs_iface);
   2351 
   2352    sampler->destroy(sampler);
   2353 
   2354    lp_build_mask_end(&mask);
   2355 
   2356    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
   2357 
   2358    gallivm_verify_function(gallivm, variant_func);
   2359 }
   2360 
   2361 
   2362 struct draw_gs_llvm_variant *
   2363 draw_gs_llvm_create_variant(struct draw_llvm *llvm,
   2364                             unsigned num_outputs,
   2365                             const struct draw_gs_llvm_variant_key *key)
   2366 {
   2367    struct draw_gs_llvm_variant *variant;
   2368    struct llvm_geometry_shader *shader =
   2369       llvm_geometry_shader(llvm->draw->gs.geometry_shader);
   2370    LLVMTypeRef vertex_header;
   2371    char module_name[64];
   2372 
   2373    variant = MALLOC(sizeof *variant +
   2374                     shader->variant_key_size -
   2375                     sizeof variant->key);
   2376    if (!variant)
   2377       return NULL;
   2378 
   2379    variant->llvm = llvm;
   2380    variant->shader = shader;
   2381 
   2382    util_snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u",
   2383                  variant->shader->variants_cached);
   2384 
   2385    variant->gallivm = gallivm_create(module_name, llvm->context);
   2386 
   2387    create_gs_jit_types(variant);
   2388 
   2389    memcpy(&variant->key, key, shader->variant_key_size);
   2390 
   2391    vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);
   2392 
   2393    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
   2394 
   2395    draw_gs_llvm_generate(llvm, variant);
   2396 
   2397    gallivm_compile_module(variant->gallivm);
   2398 
   2399    variant->jit_func = (draw_gs_jit_func)
   2400          gallivm_jit_function(variant->gallivm, variant->function);
   2401 
   2402    gallivm_free_ir(variant->gallivm);
   2403 
   2404    variant->list_item_global.base = variant;
   2405    variant->list_item_local.base = variant;
   2406    /*variant->no = */shader->variants_created++;
   2407    variant->list_item_global.base = variant;
   2408 
   2409    return variant;
   2410 }
   2411 
   2412 void
   2413 draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
   2414 {
   2415    struct draw_llvm *llvm = variant->llvm;
   2416 
   2417    gallivm_destroy(variant->gallivm);
   2418 
   2419    remove_from_list(&variant->list_item_local);
   2420    variant->shader->variants_cached--;
   2421    remove_from_list(&variant->list_item_global);
   2422    llvm->nr_gs_variants--;
   2423    FREE(variant);
   2424 }
   2425 
   2426 struct draw_gs_llvm_variant_key *
   2427 draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
   2428 {
   2429    unsigned i;
   2430    struct draw_gs_llvm_variant_key *key;
   2431    struct draw_sampler_static_state *draw_sampler;
   2432 
   2433    key = (struct draw_gs_llvm_variant_key *)store;
   2434 
   2435    memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0]));
   2436 
   2437    key->num_outputs = draw_total_gs_outputs(llvm->draw);
   2438 
   2439    /* All variants of this shader will have the same value for
   2440     * nr_samplers.  Not yet trying to compact away holes in the
   2441     * sampler array.
   2442     */
   2443    key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
   2444    if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
   2445       key->nr_sampler_views =
   2446          llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
   2447    }
   2448    else {
   2449       key->nr_sampler_views = key->nr_samplers;
   2450    }
   2451 
   2452    draw_sampler = key->samplers;
   2453 
   2454    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
   2455 
   2456    for (i = 0 ; i < key->nr_samplers; i++) {
   2457       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
   2458                                       llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
   2459    }
   2460    for (i = 0 ; i < key->nr_sampler_views; i++) {
   2461       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
   2462                                       llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
   2463    }
   2464 
   2465    return key;
   2466 }
   2467 
   2468 void
   2469 draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
   2470 {
   2471    unsigned i;
   2472    struct draw_sampler_static_state *sampler = key->samplers;
   2473 
   2474    for (i = 0 ; i < key->nr_sampler_views; i++) {
   2475       debug_printf("sampler[%i].src_format = %s\n", i,
   2476                    util_format_name(sampler[i].texture_state.format));
   2477    }
   2478 }
   2479