Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * Copyright 2007-2008 VMware, Inc.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial portions
     17  * of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  **************************************************************************/
     28 
     29 /**
     30  * @file
     31  * TGSI to LLVM IR translation -- SoA.
     32  *
     33  * @author Jose Fonseca <jfonseca (at) vmware.com>
     34  *
     35  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
     36  * Brian Paul, and others.
     37  */
     38 
     39 #include "pipe/p_config.h"
     40 #include "pipe/p_shader_tokens.h"
     41 #include "util/u_debug.h"
     42 #include "util/u_math.h"
     43 #include "util/u_memory.h"
     44 #include "tgsi/tgsi_dump.h"
     45 #include "tgsi/tgsi_exec.h"
     46 #include "tgsi/tgsi_info.h"
     47 #include "tgsi/tgsi_parse.h"
     48 #include "tgsi/tgsi_util.h"
     49 #include "tgsi/tgsi_scan.h"
     50 #include "tgsi/tgsi_strings.h"
     51 #include "lp_bld_tgsi_action.h"
     52 #include "lp_bld_type.h"
     53 #include "lp_bld_const.h"
     54 #include "lp_bld_arit.h"
     55 #include "lp_bld_bitarit.h"
     56 #include "lp_bld_gather.h"
     57 #include "lp_bld_init.h"
     58 #include "lp_bld_logic.h"
     59 #include "lp_bld_swizzle.h"
     60 #include "lp_bld_flow.h"
     61 #include "lp_bld_quad.h"
     62 #include "lp_bld_tgsi.h"
     63 #include "lp_bld_limits.h"
     64 #include "lp_bld_debug.h"
     65 #include "lp_bld_printf.h"
     66 #include "lp_bld_sample.h"
     67 #include "lp_bld_struct.h"
     68 
     69 /* SM 4.0 says that subroutines can nest 32 deep and
     70  * we need one more for our main function */
     71 #define LP_MAX_NUM_FUNCS 33
     72 
     73 #define DUMP_GS_EMITS 0
     74 
     75 /*
     76  * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
     77  * instruction.
     78  *
     79  * TODO:
     80  * - take execution masks in consideration
     81  * - debug control-flow instructions
     82  */
     83 #define DEBUG_EXECUTION 0
     84 
     85 
     86 /*
     87  * Emit code to print a register value.
     88  */
     89 static void
     90 emit_dump_reg(struct gallivm_state *gallivm,
     91               unsigned file,
     92               unsigned index,
     93               unsigned chan,
     94               LLVMValueRef value)
     95 {
     96    char buf[32];
     97 
     98    util_snprintf(buf, sizeof buf, "    %s[%u].%c = ",
     99                  tgsi_file_name(file),
    100                  index, "xyzw"[chan]);
    101 
    102    lp_build_print_value(gallivm, buf, value);
    103 }
    104 
    105 /*
    106  * Return the context for the current function.
    107  * (always 'main', if shader doesn't do any function calls)
    108  */
    109 static inline struct function_ctx *
    110 func_ctx(struct lp_exec_mask *mask)
    111 {
    112    assert(mask->function_stack_size > 0);
    113    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
    114    return &mask->function_stack[mask->function_stack_size - 1];
    115 }
    116 
    117 /*
    118  * Returns true if we're in a loop.
    119  * It's global, meaning that it returns true even if there's
    120  * no loop inside the current function, but we were inside
    121  * a loop inside another function, from which this one was called.
    122  */
    123 static inline boolean
    124 mask_has_loop(struct lp_exec_mask *mask)
    125 {
    126    int i;
    127    for (i = mask->function_stack_size - 1; i >= 0; --i) {
    128       const struct function_ctx *ctx = &mask->function_stack[i];
    129       if (ctx->loop_stack_size > 0)
    130          return TRUE;
    131    }
    132    return FALSE;
    133 }
    134 
    135 /*
    136  * Returns true if we're inside a switch statement.
    137  * It's global, meaning that it returns true even if there's
    138  * no switch in the current function, but we were inside
    139  * a switch inside another function, from which this one was called.
    140  */
    141 static inline boolean
    142 mask_has_switch(struct lp_exec_mask *mask)
    143 {
    144    int i;
    145    for (i = mask->function_stack_size - 1; i >= 0; --i) {
    146       const struct function_ctx *ctx = &mask->function_stack[i];
    147       if (ctx->switch_stack_size > 0)
    148          return TRUE;
    149    }
    150    return FALSE;
    151 }
    152 
    153 /*
    154  * Returns true if we're inside a conditional.
    155  * It's global, meaning that it returns true even if there's
    156  * no conditional in the current function, but we were inside
    157  * a conditional inside another function, from which this one was called.
    158  */
    159 static inline boolean
    160 mask_has_cond(struct lp_exec_mask *mask)
    161 {
    162    int i;
    163    for (i = mask->function_stack_size - 1; i >= 0; --i) {
    164       const struct function_ctx *ctx = &mask->function_stack[i];
    165       if (ctx->cond_stack_size > 0)
    166          return TRUE;
    167    }
    168    return FALSE;
    169 }
    170 
    171 
    172 /*
    173  * Initialize a function context at the specified index.
    174  */
    175 static void
    176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
    177 {
    178    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
    179    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    180    struct function_ctx *ctx =  &mask->function_stack[function_idx];
    181 
    182    ctx->cond_stack_size = 0;
    183    ctx->loop_stack_size = 0;
    184    ctx->switch_stack_size = 0;
    185 
    186    if (function_idx == 0) {
    187       ctx->ret_mask = mask->ret_mask;
    188    }
    189 
    190    ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
    191                                        int_type, "looplimiter");
    192    LLVMBuildStore(
    193       builder,
    194       LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
    195       ctx->loop_limiter);
    196 }
    197 
    198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
    199 {
    200    mask->bld = bld;
    201    mask->has_mask = FALSE;
    202    mask->ret_in_main = FALSE;
    203    /* For the main function */
    204    mask->function_stack_size = 1;
    205 
    206    mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
    207    mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
    208          mask->cond_mask = mask->switch_mask =
    209          LLVMConstAllOnes(mask->int_vec_type);
    210 
    211    mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
    212                                  sizeof(mask->function_stack[0]));
    213    lp_exec_mask_function_init(mask, 0);
    214 }
    215 
    216 static void
    217 lp_exec_mask_fini(struct lp_exec_mask *mask)
    218 {
    219    FREE(mask->function_stack);
    220 }
    221 
    222 static void lp_exec_mask_update(struct lp_exec_mask *mask)
    223 {
    224    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    225    boolean has_loop_mask = mask_has_loop(mask);
    226    boolean has_cond_mask = mask_has_cond(mask);
    227    boolean has_switch_mask = mask_has_switch(mask);
    228    boolean has_ret_mask = mask->function_stack_size > 1 ||
    229          mask->ret_in_main;
    230 
    231    if (has_loop_mask) {
    232       /*for loops we need to update the entire mask at runtime */
    233       LLVMValueRef tmp;
    234       assert(mask->break_mask);
    235       tmp = LLVMBuildAnd(builder,
    236                          mask->cont_mask,
    237                          mask->break_mask,
    238                          "maskcb");
    239       mask->exec_mask = LLVMBuildAnd(builder,
    240                                      mask->cond_mask,
    241                                      tmp,
    242                                      "maskfull");
    243    } else
    244       mask->exec_mask = mask->cond_mask;
    245 
    246    if (has_switch_mask) {
    247       mask->exec_mask = LLVMBuildAnd(builder,
    248                                      mask->exec_mask,
    249                                      mask->switch_mask,
    250                                      "switchmask");
    251    }
    252 
    253    if (has_ret_mask) {
    254       mask->exec_mask = LLVMBuildAnd(builder,
    255                                      mask->exec_mask,
    256                                      mask->ret_mask,
    257                                      "callmask");
    258    }
    259 
    260    mask->has_mask = (has_cond_mask ||
    261                      has_loop_mask ||
    262                      has_switch_mask ||
    263                      has_ret_mask);
    264 }
    265 
    266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
    267                                    LLVMValueRef val)
    268 {
    269    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    270    struct function_ctx *ctx = func_ctx(mask);
    271 
    272    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
    273       ctx->cond_stack_size++;
    274       return;
    275    }
    276    if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
    277       assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
    278    }
    279    ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
    280    assert(LLVMTypeOf(val) == mask->int_vec_type);
    281    mask->cond_mask = LLVMBuildAnd(builder,
    282                                   mask->cond_mask,
    283                                   val,
    284                                   "");
    285    lp_exec_mask_update(mask);
    286 }
    287 
    288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
    289 {
    290    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    291    struct function_ctx *ctx = func_ctx(mask);
    292    LLVMValueRef prev_mask;
    293    LLVMValueRef inv_mask;
    294 
    295    assert(ctx->cond_stack_size);
    296    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
    297       return;
    298    prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
    299    if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
    300       assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
    301    }
    302 
    303    inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
    304 
    305    mask->cond_mask = LLVMBuildAnd(builder,
    306                                   inv_mask,
    307                                   prev_mask, "");
    308    lp_exec_mask_update(mask);
    309 }
    310 
    311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
    312 {
    313    struct function_ctx *ctx = func_ctx(mask);
    314    assert(ctx->cond_stack_size);
    315    --ctx->cond_stack_size;
    316    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
    317       return;
    318    mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
    319    lp_exec_mask_update(mask);
    320 }
    321 
    322 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
    323 {
    324    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    325    struct function_ctx *ctx = func_ctx(mask);
    326 
    327    if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
    328       ++ctx->loop_stack_size;
    329       return;
    330    }
    331 
    332    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
    333       ctx->break_type;
    334    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
    335 
    336    ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
    337    ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
    338    ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
    339    ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
    340    ++ctx->loop_stack_size;
    341 
    342    ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
    343    LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
    344 
    345    ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
    346 
    347    LLVMBuildBr(builder, ctx->loop_block);
    348    LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
    349 
    350    mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
    351 
    352    lp_exec_mask_update(mask);
    353 }
    354 
    355 static void lp_exec_break(struct lp_exec_mask *mask,
    356                           struct lp_build_tgsi_context * bld_base)
    357 {
    358    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    359    struct function_ctx *ctx = func_ctx(mask);
    360 
    361    if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
    362       LLVMValueRef exec_mask = LLVMBuildNot(builder,
    363                                             mask->exec_mask,
    364                                             "break");
    365 
    366       mask->break_mask = LLVMBuildAnd(builder,
    367                                       mask->break_mask,
    368                                       exec_mask, "break_full");
    369    }
    370    else {
    371       unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
    372       boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
    373                               opcode == TGSI_OPCODE_CASE);
    374 
    375 
    376       if (ctx->switch_in_default) {
    377          /*
    378           * stop default execution but only if this is an unconditional switch.
    379           * (The condition here is not perfect since dead code after break is
    380           * allowed but should be sufficient since false negatives are just
    381           * unoptimized - so we don't have to pre-evaluate that).
    382           */
    383          if(break_always && ctx->switch_pc) {
    384             bld_base->pc = ctx->switch_pc;
    385             return;
    386          }
    387       }
    388 
    389       if (break_always) {
    390          mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
    391       }
    392       else {
    393          LLVMValueRef exec_mask = LLVMBuildNot(builder,
    394                                                mask->exec_mask,
    395                                                "break");
    396          mask->switch_mask = LLVMBuildAnd(builder,
    397                                           mask->switch_mask,
    398                                           exec_mask, "break_switch");
    399       }
    400    }
    401 
    402    lp_exec_mask_update(mask);
    403 }
    404 
    405 static void lp_exec_continue(struct lp_exec_mask *mask)
    406 {
    407    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    408    LLVMValueRef exec_mask = LLVMBuildNot(builder,
    409                                          mask->exec_mask,
    410                                          "");
    411 
    412    mask->cont_mask = LLVMBuildAnd(builder,
    413                                   mask->cont_mask,
    414                                   exec_mask, "");
    415 
    416    lp_exec_mask_update(mask);
    417 }
    418 
    419 
    420 static void lp_exec_endloop(struct gallivm_state *gallivm,
    421                             struct lp_exec_mask *mask)
    422 {
    423    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    424    struct function_ctx *ctx = func_ctx(mask);
    425    LLVMBasicBlockRef endloop;
    426    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
    427    LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
    428                                                mask->bld->type.width *
    429                                                mask->bld->type.length);
    430    LLVMValueRef i1cond, i2cond, icond, limiter;
    431 
    432    assert(mask->break_mask);
    433 
    434 
    435    assert(ctx->loop_stack_size);
    436    if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
    437       --ctx->loop_stack_size;
    438       return;
    439    }
    440 
    441    /*
    442     * Restore the cont_mask, but don't pop
    443     */
    444    mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
    445    lp_exec_mask_update(mask);
    446 
    447    /*
    448     * Unlike the continue mask, the break_mask must be preserved across loop
    449     * iterations
    450     */
    451    LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
    452 
    453    /* Decrement the loop limiter */
    454    limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
    455 
    456    limiter = LLVMBuildSub(
    457       builder,
    458       limiter,
    459       LLVMConstInt(int_type, 1, false),
    460       "");
    461 
    462    LLVMBuildStore(builder, limiter, ctx->loop_limiter);
    463 
    464    /* i1cond = (mask != 0) */
    465    i1cond = LLVMBuildICmp(
    466       builder,
    467       LLVMIntNE,
    468       LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
    469       LLVMConstNull(reg_type), "i1cond");
    470 
    471    /* i2cond = (looplimiter > 0) */
    472    i2cond = LLVMBuildICmp(
    473       builder,
    474       LLVMIntSGT,
    475       limiter,
    476       LLVMConstNull(int_type), "i2cond");
    477 
    478    /* if( i1cond && i2cond ) */
    479    icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
    480 
    481    endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
    482 
    483    LLVMBuildCondBr(builder,
    484                    icond, ctx->loop_block, endloop);
    485 
    486    LLVMPositionBuilderAtEnd(builder, endloop);
    487 
    488    assert(ctx->loop_stack_size);
    489    --ctx->loop_stack_size;
    490    mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
    491    mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
    492    ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
    493    ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
    494    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
    495          ctx->switch_stack_size];
    496 
    497    lp_exec_mask_update(mask);
    498 }
    499 
    500 static void lp_exec_switch(struct lp_exec_mask *mask,
    501                            LLVMValueRef switchval)
    502 {
    503    struct function_ctx *ctx = func_ctx(mask);
    504 
    505    if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
    506        ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
    507       ctx->switch_stack_size++;
    508       return;
    509    }
    510 
    511    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
    512       ctx->break_type;
    513    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
    514 
    515    ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
    516    ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
    517    ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
    518    ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
    519    ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
    520    ctx->switch_stack_size++;
    521 
    522    mask->switch_mask = LLVMConstNull(mask->int_vec_type);
    523    ctx->switch_val = switchval;
    524    ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
    525    ctx->switch_in_default = false;
    526    ctx->switch_pc = 0;
    527 
    528    lp_exec_mask_update(mask);
    529 }
    530 
    531 static void lp_exec_endswitch(struct lp_exec_mask *mask,
    532                               struct lp_build_tgsi_context * bld_base)
    533 {
    534    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    535    struct function_ctx *ctx = func_ctx(mask);
    536 
    537    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
    538       ctx->switch_stack_size--;
    539       return;
    540    }
    541 
    542    /* check if there's deferred default if so do it now */
    543    if (ctx->switch_pc && !ctx->switch_in_default) {
    544       LLVMValueRef prevmask, defaultmask;
    545       unsigned tmp_pc;
    546       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
    547       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
    548       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
    549       ctx->switch_in_default = true;
    550 
    551       lp_exec_mask_update(mask);
    552 
    553       assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
    554              TGSI_OPCODE_DEFAULT);
    555 
    556       tmp_pc = bld_base->pc;
    557       bld_base->pc = ctx->switch_pc;
    558       /*
    559        * re-purpose switch_pc to point to here again, since we stop execution of
    560        * the deferred default after next break.
    561        */
    562       ctx->switch_pc = tmp_pc - 1;
    563 
    564       return;
    565    }
    566 
    567    else if (ctx->switch_pc && ctx->switch_in_default) {
    568       assert(bld_base->pc == ctx->switch_pc + 1);
    569    }
    570 
    571    ctx->switch_stack_size--;
    572    mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
    573    ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
    574    ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
    575    ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
    576    ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
    577 
    578    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
    579 
    580    lp_exec_mask_update(mask);
    581 }
    582 
    583 static void lp_exec_case(struct lp_exec_mask *mask,
    584                          LLVMValueRef caseval)
    585 {
    586    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    587    struct function_ctx *ctx = func_ctx(mask);
    588 
    589    LLVMValueRef casemask, prevmask;
    590 
    591    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
    592       return;
    593    }
    594 
    595    /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
    596    if (!ctx->switch_in_default) {
    597       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
    598       casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
    599       ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
    600                                              ctx->switch_mask_default, "sw_default_mask");
    601       casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
    602       mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
    603 
    604       lp_exec_mask_update(mask);
    605    }
    606 }
    607 
    608 /*
    609  * Analyse default statement in a switch.
    610  * \return true if default is last statement, false otherwise
    611  * \param default_pc_start contains pc of instruction to jump to
    612  *                         if default wasn't last but there's no
    613  *                         fallthrough into default.
    614  */
    615 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
    616                                        struct lp_build_tgsi_context * bld_base,
    617                                        int *default_pc_start)
    618 {
    619    unsigned pc = bld_base->pc;
    620    struct function_ctx *ctx = func_ctx(mask);
    621    int curr_switch_stack = ctx->switch_stack_size;
    622 
    623    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
    624       return false;
    625    }
    626 
    627    /* skip over case statements which are together with default */
    628    while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
    629       pc++;
    630    }
    631 
    632    while (pc != ~0u && pc < bld_base->num_instructions) {
    633       unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
    634       switch (opcode) {
    635       case TGSI_OPCODE_CASE:
    636          if (curr_switch_stack == ctx->switch_stack_size) {
    637             *default_pc_start = pc - 1;
    638             return false;
    639          }
    640          break;
    641       case TGSI_OPCODE_SWITCH:
    642          curr_switch_stack++;
    643          break;
    644       case TGSI_OPCODE_ENDSWITCH:
    645          if (curr_switch_stack == ctx->switch_stack_size) {
    646             *default_pc_start = pc - 1;
    647             return true;
    648          }
    649          curr_switch_stack--;
    650          break;
    651       }
    652       pc++;
    653    }
    654    /* should never arrive here */
    655    assert(0);
    656    return true;
    657 }
    658 
    659 static void lp_exec_default(struct lp_exec_mask *mask,
    660                             struct lp_build_tgsi_context * bld_base)
    661 {
    662    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    663    struct function_ctx *ctx = func_ctx(mask);
    664 
    665    int default_exec_pc;
    666    boolean default_is_last;
    667 
    668    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
    669       return;
    670    }
    671 
    672    /*
    673     * This is a messy opcode, because it may not be always at the end and
    674     * there can be fallthrough in and out of it.
    675     */
    676 
    677    default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
    678    /*
    679     * If it is last statement in switch (note that case statements appearing
    680     * "at the same time" as default don't change that) everything is just fine,
    681     * update switch mask and go on. This means we can handle default with
    682     * fallthrough INTO it without overhead, if it is last.
    683     */
    684    if (default_is_last) {
    685       LLVMValueRef prevmask, defaultmask;
    686       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
    687       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
    688       defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
    689       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
    690       ctx->switch_in_default = true;
    691 
    692       lp_exec_mask_update(mask);
    693    }
    694    else {
    695       /*
    696        * Technically, "case" immediately before default isn't really a
    697        * fallthrough, however we still have to count them as such as we
    698        * already have updated the masks.
    699        * If that happens in practice could add a switch optimizer pass
    700        * which just gets rid of all case statements appearing together with
    701        * default (or could do switch analysis at switch start time instead).
    702        */
    703       unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
    704       boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
    705                          opcode != TGSI_OPCODE_SWITCH);
    706       /*
    707        * If it is not last statement and there was no fallthrough into it,
    708        * we record the PC and continue execution at next case (again, those
    709        * case encountered at the same time don't count). At endswitch
    710        * time, we update switchmask, and go back executing the code we skipped
    711        * until the next break (possibly re-executing some code with changed mask
    712        * if there was a fallthrough out of default).
    713        * Finally, if it is not last statement and there was a fallthrough into it,
    714        * do the same as with the former case, except instead of skipping the code
    715        * just execute it without updating the mask, then go back and re-execute.
    716        */
    717       ctx->switch_pc = bld_base->pc;
    718       if (!ft_into) {
    719          bld_base->pc = default_exec_pc;
    720       }
    721    }
    722 }
    723 
    724 
    725 /* stores val into an address pointed to by dst_ptr.
    726  * mask->exec_mask is used to figure out which bits of val
    727  * should be stored into the address
    728  * (0 means don't store this bit, 1 means do store).
    729  */
    730 static void lp_exec_mask_store(struct lp_exec_mask *mask,
    731                                struct lp_build_context *bld_store,
    732                                LLVMValueRef val,
    733                                LLVMValueRef dst_ptr)
    734 {
    735    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    736    LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
    737 
    738    assert(lp_check_value(bld_store->type, val));
    739    assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
    740    assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
    741 
    742    if (exec_mask) {
    743       LLVMValueRef res, dst;
    744 
    745       dst = LLVMBuildLoad(builder, dst_ptr, "");
    746       res = lp_build_select(bld_store, exec_mask, val, dst);
    747       LLVMBuildStore(builder, res, dst_ptr);
    748    } else
    749       LLVMBuildStore(builder, val, dst_ptr);
    750 }
    751 
    752 static void lp_exec_mask_call(struct lp_exec_mask *mask,
    753                               int func,
    754                               int *pc)
    755 {
    756    if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
    757       return;
    758    }
    759 
    760    lp_exec_mask_function_init(mask, mask->function_stack_size);
    761    mask->function_stack[mask->function_stack_size].pc = *pc;
    762    mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
    763    mask->function_stack_size++;
    764    *pc = func;
    765 }
    766 
    767 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
    768 {
    769    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    770    struct function_ctx *ctx = func_ctx(mask);
    771    LLVMValueRef exec_mask;
    772 
    773    if (ctx->cond_stack_size == 0 &&
    774        ctx->loop_stack_size == 0 &&
    775        ctx->switch_stack_size == 0 &&
    776        mask->function_stack_size == 1) {
    777       /* returning from main() */
    778       *pc = -1;
    779       return;
    780    }
    781 
    782    if (mask->function_stack_size == 1) {
    783       /*
    784        * This requires special handling since we need to ensure
    785        * we don't drop the mask even if we have no call stack
    786        * (e.g. after a ret in a if clause after the endif)
    787        */
    788       mask->ret_in_main = TRUE;
    789    }
    790 
    791    exec_mask = LLVMBuildNot(builder,
    792                             mask->exec_mask,
    793                             "ret");
    794 
    795    mask->ret_mask = LLVMBuildAnd(builder,
    796                                  mask->ret_mask,
    797                                  exec_mask, "ret_full");
    798 
    799    lp_exec_mask_update(mask);
    800 }
    801 
    802 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
    803 {
    804 }
    805 
    806 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
    807 {
    808    struct function_ctx *ctx;
    809 
    810    assert(mask->function_stack_size > 1);
    811    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
    812 
    813    ctx = func_ctx(mask);
    814    mask->function_stack_size--;
    815 
    816    *pc = ctx->pc;
    817    mask->ret_mask = ctx->ret_mask;
    818 
    819    lp_exec_mask_update(mask);
    820 }
    821 
    822 
    823 static LLVMValueRef
    824 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
    825              unsigned file,
    826              int index,
    827              unsigned chan)
    828 {
    829    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    830    LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
    831    LLVMValueRef var_of_array;
    832 
    833    switch (file) {
    834    case TGSI_FILE_TEMPORARY:
    835       array_of_vars = bld->temps;
    836       var_of_array = bld->temps_array;
    837       break;
    838    case TGSI_FILE_OUTPUT:
    839       array_of_vars = bld->outputs;
    840       var_of_array = bld->outputs_array;
    841       break;
    842    default:
    843       assert(0);
    844       return NULL;
    845    }
    846 
    847    assert(chan < 4);
    848 
    849    if (bld->indirect_files & (1 << file)) {
    850       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
    851       return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
    852    }
    853    else {
    854       assert(index <= bld->bld_base.info->file_max[file]);
    855       return array_of_vars[index][chan];
    856    }
    857 }
    858 
    859 
    860 /**
    861  * Return pointer to a temporary register channel (src or dest).
    862  * Note that indirect addressing cannot be handled here.
    863  * \param index  which temporary register
    864  * \param chan  which channel of the temp register.
    865  */
    866 LLVMValueRef
    867 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
    868              unsigned index,
    869              unsigned chan)
    870 {
    871    return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
    872 }
    873 
    874 /**
    875  * Return pointer to a output register channel (src or dest).
    876  * Note that indirect addressing cannot be handled here.
    877  * \param index  which output register
    878  * \param chan  which channel of the output register.
    879  */
    880 LLVMValueRef
    881 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
    882                unsigned index,
    883                unsigned chan)
    884 {
    885    return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
    886 }
    887 
    888 /*
    889  * If we have indirect addressing in outputs copy our alloca array
    890  * to the outputs slots specified by the caller to make sure
    891  * our outputs are delivered consistently via the same interface.
    892  */
    893 static void
    894 gather_outputs(struct lp_build_tgsi_soa_context * bld)
    895 {
    896    if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
    897       unsigned index, chan;
    898       assert(bld->bld_base.info->num_outputs <=
    899              bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
    900       for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
    901          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
    902             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
    903          }
    904       }
    905    }
    906 }
    907 
    908 /**
    909  * Gather vector.
    910  * XXX the lp_build_gather() function should be capable of doing this
    911  * with a little work.
    912  */
    913 static LLVMValueRef
    914 build_gather(struct lp_build_tgsi_context *bld_base,
    915              LLVMValueRef base_ptr,
    916              LLVMValueRef indexes,
    917              LLVMValueRef overflow_mask,
    918              LLVMValueRef indexes2)
    919 {
    920    struct gallivm_state *gallivm = bld_base->base.gallivm;
    921    LLVMBuilderRef builder = gallivm->builder;
    922    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    923    struct lp_build_context *bld = &bld_base->base;
    924    LLVMValueRef res;
    925    unsigned i;
    926 
    927    if (indexes2)
    928       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
    929    else
    930       res = bld->undef;
    931    /*
    932     * overflow_mask is a vector telling us which channels
    933     * in the vector overflowed. We use the overflow behavior for
    934     * constant buffers which is defined as:
    935     * Out of bounds access to constant buffer returns 0 in all
    936     * components. Out of bounds behavior is always with respect
    937     * to the size of the buffer bound at that slot.
    938     */
    939 
    940    if (overflow_mask) {
    941       /*
    942        * We avoid per-element control flow here (also due to llvm going crazy,
    943        * though I suspect it's better anyway since overflow is likely rare).
    944        * Note that since we still fetch from buffers even if num_elements was
    945        * zero (in this case we'll fetch from index zero) the jit func callers
    946        * MUST provide valid fake constant buffers of size 4x32 (the values do
    947        * not matter), otherwise we'd still need (not per element though)
    948        * control flow.
    949        */
    950       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
    951       if (indexes2)
    952          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
    953    }
    954 
    955    /*
    956     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
    957     */
    958    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
    959       LLVMValueRef si, di;
    960       LLVMValueRef index;
    961       LLVMValueRef scalar_ptr, scalar;
    962 
    963       di = lp_build_const_int32(bld->gallivm, i);
    964       if (indexes2)
    965          si = lp_build_const_int32(bld->gallivm, i >> 1);
    966       else
    967          si = di;
    968 
    969       if (indexes2 && (i & 1)) {
    970          index = LLVMBuildExtractElement(builder,
    971                                          indexes2, si, "");
    972       } else {
    973          index = LLVMBuildExtractElement(builder,
    974                                          indexes, si, "");
    975       }
    976       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
    977                                 &index, 1, "gather_ptr");
    978       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
    979 
    980       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
    981    }
    982 
    983    if (overflow_mask) {
    984       if (indexes2) {
    985          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
    986          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
    987                                        bld_base->dbl_bld.int_vec_type, "");
    988          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
    989                                bld_base->dbl_bld.zero, res);
    990       } else
    991          res = lp_build_select(bld, overflow_mask, bld->zero, res);
    992    }
    993 
    994    return res;
    995 }
    996 
    997 
    998 /**
    999  * Scatter/store vector.
   1000  */
   1001 static void
   1002 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
   1003                   LLVMValueRef base_ptr,
   1004                   LLVMValueRef indexes,
   1005                   LLVMValueRef values,
   1006                   struct lp_exec_mask *mask)
   1007 {
   1008    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1009    LLVMBuilderRef builder = gallivm->builder;
   1010    unsigned i;
   1011    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
   1012 
   1013    /*
   1014     * Loop over elements of index_vec, store scalar value.
   1015     */
   1016    for (i = 0; i < bld->bld_base.base.type.length; i++) {
   1017       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
   1018       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
   1019       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
   1020       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
   1021       LLVMValueRef scalar_pred = pred ?
   1022          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
   1023 
   1024       if (0)
   1025          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
   1026                          ii, val, index, scalar_ptr);
   1027 
   1028       if (scalar_pred) {
   1029          LLVMValueRef real_val, dst_val;
   1030          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
   1031          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
   1032          LLVMBuildStore(builder, real_val, scalar_ptr);
   1033       }
   1034       else {
   1035          LLVMBuildStore(builder, val, scalar_ptr);
   1036       }
   1037    }
   1038 }
   1039 
   1040 
   1041 /**
   1042  * Read the current value of the ADDR register, convert the floats to
   1043  * ints, add the base index and return the vector of offsets.
   1044  * The offsets will be used to index into the constant buffer or
   1045  * temporary register file.
   1046  */
   1047 static LLVMValueRef
   1048 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
   1049                    unsigned reg_file, unsigned reg_index,
   1050                    const struct tgsi_ind_register *indirect_reg)
   1051 {
   1052    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   1053    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
   1054    /* always use X component of address register */
   1055    unsigned swizzle = indirect_reg->Swizzle;
   1056    LLVMValueRef base;
   1057    LLVMValueRef rel;
   1058    LLVMValueRef max_index;
   1059    LLVMValueRef index;
   1060 
   1061    assert(bld->indirect_files & (1 << reg_file));
   1062 
   1063    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
   1064 
   1065    assert(swizzle < 4);
   1066    switch (indirect_reg->File) {
   1067    case TGSI_FILE_ADDRESS:
   1068       rel = LLVMBuildLoad(builder,
   1069                           bld->addr[indirect_reg->Index][swizzle],
   1070                           "load addr reg");
   1071       /* ADDR LLVM values already have LLVM integer type. */
   1072       break;
   1073    case TGSI_FILE_TEMPORARY:
   1074       rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
   1075       rel = LLVMBuildLoad(builder, rel, "load temp reg");
   1076       /* TEMP LLVM values always have LLVM float type, but for indirection, the
   1077        * value actually stored is expected to be an integer */
   1078       rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
   1079       break;
   1080    default:
   1081       assert(0);
   1082       rel = uint_bld->zero;
   1083    }
   1084 
   1085    index = lp_build_add(uint_bld, base, rel);
   1086 
   1087    /*
   1088     * emit_fetch_constant handles constant buffer overflow so this code
   1089     * is pointless for them.
   1090     * Furthermore the D3D10 spec in section 6.5 says:
   1091     * If the constant buffer bound to a slot is larger than the size
   1092     * declared in the shader for that slot, implementations are allowed
   1093     * to return incorrect data (not necessarily 0) for indices that are
   1094     * larger than the declared size but smaller than the buffer size.
   1095     */
   1096    if (reg_file != TGSI_FILE_CONSTANT) {
   1097       max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
   1098                                          uint_bld->type,
   1099                                          bld->bld_base.info->file_max[reg_file]);
   1100 
   1101       assert(!uint_bld->type.sign);
   1102       index = lp_build_min(uint_bld, index, max_index);
   1103    }
   1104 
   1105    return index;
   1106 }
   1107 
   1108 static struct lp_build_context *
   1109 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
   1110 	       enum tgsi_opcode_type stype)
   1111 {
   1112    struct lp_build_context *bld_fetch;
   1113 
   1114    switch (stype) {
   1115    case TGSI_TYPE_FLOAT:
   1116    case TGSI_TYPE_UNTYPED:
   1117       bld_fetch = &bld_base->base;
   1118       break;
   1119    case TGSI_TYPE_UNSIGNED:
   1120       bld_fetch = &bld_base->uint_bld;
   1121       break;
   1122    case TGSI_TYPE_SIGNED:
   1123       bld_fetch = &bld_base->int_bld;
   1124       break;
   1125    case TGSI_TYPE_DOUBLE:
   1126       bld_fetch = &bld_base->dbl_bld;
   1127       break;
   1128    case TGSI_TYPE_UNSIGNED64:
   1129       bld_fetch = &bld_base->uint64_bld;
   1130       break;
   1131    case TGSI_TYPE_SIGNED64:
   1132       bld_fetch = &bld_base->int64_bld;
   1133       break;
   1134    case TGSI_TYPE_VOID:
   1135    default:
   1136       assert(0);
   1137       bld_fetch = NULL;
   1138       break;
   1139    }
   1140    return bld_fetch;
   1141 }
   1142 
   1143 static LLVMValueRef
   1144 get_soa_array_offsets(struct lp_build_context *uint_bld,
   1145                       LLVMValueRef indirect_index,
   1146                       unsigned chan_index,
   1147                       boolean need_perelement_offset)
   1148 {
   1149    struct gallivm_state *gallivm = uint_bld->gallivm;
   1150    LLVMValueRef chan_vec =
   1151       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
   1152    LLVMValueRef length_vec =
   1153       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
   1154    LLVMValueRef index_vec;
   1155 
   1156    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
   1157    index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
   1158    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
   1159    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
   1160 
   1161    if (need_perelement_offset) {
   1162       LLVMValueRef pixel_offsets;
   1163       unsigned i;
   1164      /* build pixel offset vector: {0, 1, 2, 3, ...} */
   1165       pixel_offsets = uint_bld->undef;
   1166       for (i = 0; i < uint_bld->type.length; i++) {
   1167          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
   1168          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
   1169                                                 ii, ii, "");
   1170       }
   1171       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
   1172    }
   1173    return index_vec;
   1174 }
   1175 
   1176 static LLVMValueRef
   1177 emit_fetch_constant(
   1178    struct lp_build_tgsi_context * bld_base,
   1179    const struct tgsi_full_src_register * reg,
   1180    enum tgsi_opcode_type stype,
   1181    unsigned swizzle)
   1182 {
   1183    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1184    struct gallivm_state *gallivm = bld_base->base.gallivm;
   1185    LLVMBuilderRef builder = gallivm->builder;
   1186    struct lp_build_context *uint_bld = &bld_base->uint_bld;
   1187    unsigned dimension = 0;
   1188    LLVMValueRef consts_ptr;
   1189    LLVMValueRef num_consts;
   1190    LLVMValueRef res;
   1191 
   1192    /* XXX: Handle fetching xyzw components as a vector */
   1193    assert(swizzle != ~0u);
   1194 
   1195    if (reg->Register.Dimension) {
   1196       assert(!reg->Dimension.Indirect);
   1197       dimension = reg->Dimension.Index;
   1198       assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
   1199    }
   1200 
   1201    consts_ptr = bld->consts[dimension];
   1202    num_consts = bld->consts_sizes[dimension];
   1203 
   1204    if (reg->Register.Indirect) {
   1205       LLVMValueRef indirect_index;
   1206       LLVMValueRef swizzle_vec =
   1207          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
   1208       LLVMValueRef index_vec;  /* index into the const buffer */
   1209       LLVMValueRef overflow_mask;
   1210       LLVMValueRef index_vec2 = NULL;
   1211 
   1212       indirect_index = get_indirect_index(bld,
   1213                                           reg->Register.File,
   1214                                           reg->Register.Index,
   1215                                           &reg->Indirect);
   1216 
   1217       /* All fetches are from the same constant buffer, so
   1218        * we need to propagate the size to a vector to do a
   1219        * vector comparison */
   1220       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
   1221       /* Construct a boolean vector telling us which channels
   1222        * overflow the bound constant buffer */
   1223       overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
   1224                                        indirect_index, num_consts);
   1225 
   1226       /* index_vec = indirect_index * 4 + swizzle */
   1227       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
   1228       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
   1229 
   1230       if (tgsi_type_is_64bit(stype)) {
   1231          LLVMValueRef swizzle_vec2;
   1232          swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
   1233          index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
   1234          index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
   1235       }
   1236       /* Gather values from the constant buffer */
   1237       res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
   1238    }
   1239    else {
   1240       LLVMValueRef index;  /* index into the const buffer */
   1241       LLVMValueRef scalar, scalar_ptr;
   1242       struct lp_build_context *bld_broad = &bld_base->base;
   1243       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
   1244 
   1245       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
   1246                                 &index, 1, "");
   1247       if (stype == TGSI_TYPE_DOUBLE) {
   1248          LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
   1249          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
   1250          bld_broad = &bld_base->dbl_bld;
   1251       } else if (stype == TGSI_TYPE_UNSIGNED64) {
   1252          LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
   1253          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
   1254          bld_broad = &bld_base->uint64_bld;
   1255       } else if (stype == TGSI_TYPE_SIGNED64) {
   1256          LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
   1257          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
   1258          bld_broad = &bld_base->int64_bld;
   1259       }
   1260       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
   1261       res = lp_build_broadcast_scalar(bld_broad, scalar);
   1262    }
   1263 
   1264    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
   1265       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
   1266       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
   1267    }
   1268 
   1269    return res;
   1270 }
   1271 
   1272 /**
   1273  * Fetch 64-bit values from two separate channels.
   1274  * 64-bit values are stored split across two channels, like xy and zw.
   1275  * This function creates a set of vec_length*2 floats,
   1276  * extracts the values from the two channels,
   1277  * puts them in the correct place, then casts to vec_length 64-bits.
   1278  */
   1279 static LLVMValueRef
   1280 emit_fetch_64bit(
   1281    struct lp_build_tgsi_context * bld_base,
   1282    enum tgsi_opcode_type stype,
   1283    LLVMValueRef input,
   1284    LLVMValueRef input2)
   1285 {
   1286    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1287    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1288    LLVMBuilderRef builder = gallivm->builder;
   1289    LLVMValueRef res;
   1290    struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
   1291    int i;
   1292    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
   1293    int len = bld_base->base.type.length * 2;
   1294    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
   1295 
   1296    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
   1297       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
   1298       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
   1299    }
   1300    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
   1301 
   1302    return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
   1303 }
   1304 
   1305 static LLVMValueRef
   1306 emit_fetch_immediate(
   1307    struct lp_build_tgsi_context * bld_base,
   1308    const struct tgsi_full_src_register * reg,
   1309    enum tgsi_opcode_type stype,
   1310    unsigned swizzle)
   1311 {
   1312    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1313    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1314    LLVMBuilderRef builder = gallivm->builder;
   1315    LLVMValueRef res = NULL;
   1316 
   1317    if (bld->use_immediates_array || reg->Register.Indirect) {
   1318       LLVMValueRef imms_array;
   1319       LLVMTypeRef fptr_type;
   1320 
   1321       /* cast imms_array pointer to float* */
   1322       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1323       imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
   1324 
   1325       if (reg->Register.Indirect) {
   1326          LLVMValueRef indirect_index;
   1327          LLVMValueRef index_vec;  /* index into the immediate register array */
   1328          LLVMValueRef index_vec2 = NULL;
   1329          indirect_index = get_indirect_index(bld,
   1330                                              reg->Register.File,
   1331                                              reg->Register.Index,
   1332                                              &reg->Indirect);
   1333          /*
   1334           * Unlike for other reg classes, adding pixel offsets is unnecessary -
   1335           * immediates are stored as full vectors (FIXME??? - might be better
   1336           * to store them the same as constants) but all elements are the same
   1337           * in any case.
   1338           */
   1339          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
   1340                                            indirect_index,
   1341                                            swizzle,
   1342                                            FALSE);
   1343          if (tgsi_type_is_64bit(stype))
   1344             index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
   1345                                               indirect_index,
   1346                                               swizzle + 1,
   1347                                               FALSE);
   1348          /* Gather values from the immediate register array */
   1349          res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
   1350       } else {
   1351          LLVMValueRef lindex = lp_build_const_int32(gallivm,
   1352                                         reg->Register.Index * 4 + swizzle);
   1353          LLVMValueRef imms_ptr =  LLVMBuildGEP(builder,
   1354                                                 bld->imms_array, &lindex, 1, "");
   1355          res = LLVMBuildLoad(builder, imms_ptr, "");
   1356 
   1357          if (tgsi_type_is_64bit(stype)) {
   1358             LLVMValueRef lindex1;
   1359             LLVMValueRef imms_ptr2;
   1360             LLVMValueRef res2;
   1361 
   1362             lindex1 = lp_build_const_int32(gallivm,
   1363                                            reg->Register.Index * 4 + swizzle + 1);
   1364             imms_ptr2 = LLVMBuildGEP(builder,
   1365                                       bld->imms_array, &lindex1, 1, "");
   1366             res2 = LLVMBuildLoad(builder, imms_ptr2, "");
   1367             res = emit_fetch_64bit(bld_base, stype, res, res2);
   1368          }
   1369       }
   1370    }
   1371    else {
   1372       res = bld->immediates[reg->Register.Index][swizzle];
   1373       if (tgsi_type_is_64bit(stype))
   1374          res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
   1375    }
   1376 
   1377    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
   1378       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
   1379       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
   1380    }
   1381    return res;
   1382 }
   1383 
   1384 static LLVMValueRef
   1385 emit_fetch_input(
   1386    struct lp_build_tgsi_context * bld_base,
   1387    const struct tgsi_full_src_register * reg,
   1388    enum tgsi_opcode_type stype,
   1389    unsigned swizzle)
   1390 {
   1391    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1392    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1393    LLVMBuilderRef builder = gallivm->builder;
   1394    LLVMValueRef res;
   1395 
   1396    if (reg->Register.Indirect) {
   1397       LLVMValueRef indirect_index;
   1398       LLVMValueRef index_vec;  /* index into the input reg array */
   1399       LLVMValueRef index_vec2 = NULL;
   1400       LLVMValueRef inputs_array;
   1401       LLVMTypeRef fptr_type;
   1402 
   1403       indirect_index = get_indirect_index(bld,
   1404                                           reg->Register.File,
   1405                                           reg->Register.Index,
   1406                                           &reg->Indirect);
   1407 
   1408       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
   1409                                         indirect_index,
   1410                                         swizzle,
   1411                                         TRUE);
   1412       if (tgsi_type_is_64bit(stype)) {
   1413          index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
   1414                                            indirect_index,
   1415                                            swizzle + 1,
   1416                                            TRUE);
   1417       }
   1418       /* cast inputs_array pointer to float* */
   1419       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1420       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
   1421 
   1422       /* Gather values from the input register array */
   1423       res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
   1424    } else {
   1425       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
   1426          LLVMValueRef lindex = lp_build_const_int32(gallivm,
   1427                                         reg->Register.Index * 4 + swizzle);
   1428          LLVMValueRef input_ptr = LLVMBuildGEP(builder,
   1429                                                bld->inputs_array, &lindex, 1, "");
   1430 
   1431          res = LLVMBuildLoad(builder, input_ptr, "");
   1432          if (tgsi_type_is_64bit(stype)) {
   1433             LLVMValueRef lindex1;
   1434             LLVMValueRef input_ptr2;
   1435             LLVMValueRef res2;
   1436 
   1437             lindex1 = lp_build_const_int32(gallivm,
   1438                                            reg->Register.Index * 4 + swizzle + 1);
   1439             input_ptr2 = LLVMBuildGEP(builder,
   1440                                       bld->inputs_array, &lindex1, 1, "");
   1441             res2 = LLVMBuildLoad(builder, input_ptr2, "");
   1442             res = emit_fetch_64bit(bld_base, stype, res, res2);
   1443          }
   1444       }
   1445       else {
   1446          res = bld->inputs[reg->Register.Index][swizzle];
   1447          if (tgsi_type_is_64bit(stype))
   1448             res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
   1449       }
   1450    }
   1451 
   1452    assert(res);
   1453 
   1454    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
   1455       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
   1456       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
   1457    }
   1458 
   1459    return res;
   1460 }
   1461 
   1462 
   1463 static LLVMValueRef
   1464 emit_fetch_gs_input(
   1465    struct lp_build_tgsi_context * bld_base,
   1466    const struct tgsi_full_src_register * reg,
   1467    enum tgsi_opcode_type stype,
   1468    unsigned swizzle)
   1469 {
   1470    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1471    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1472    const struct tgsi_shader_info *info = bld->bld_base.info;
   1473    LLVMBuilderRef builder = gallivm->builder;
   1474    LLVMValueRef attrib_index = NULL;
   1475    LLVMValueRef vertex_index = NULL;
   1476    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
   1477    LLVMValueRef res;
   1478 
   1479    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
   1480       /* This is really a system value not a regular input */
   1481       assert(!reg->Register.Indirect);
   1482       assert(!reg->Dimension.Indirect);
   1483       res = bld->system_values.prim_id;
   1484       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
   1485          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
   1486       }
   1487       return res;
   1488    }
   1489 
   1490    if (reg->Register.Indirect) {
   1491       attrib_index = get_indirect_index(bld,
   1492                                         reg->Register.File,
   1493                                         reg->Register.Index,
   1494                                         &reg->Indirect);
   1495    } else {
   1496       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
   1497    }
   1498 
   1499    if (reg->Dimension.Indirect) {
   1500       vertex_index = get_indirect_index(bld,
   1501                                         reg->Register.File,
   1502                                         reg->Dimension.Index,
   1503                                         &reg->DimIndirect);
   1504    } else {
   1505       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
   1506    }
   1507 
   1508    res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
   1509                                     reg->Dimension.Indirect,
   1510                                     vertex_index,
   1511                                     reg->Register.Indirect,
   1512                                     attrib_index,
   1513                                     swizzle_index);
   1514 
   1515    assert(res);
   1516    if (tgsi_type_is_64bit(stype)) {
   1517       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
   1518       LLVMValueRef res2;
   1519       res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
   1520                                         reg->Dimension.Indirect,
   1521                                         vertex_index,
   1522                                         reg->Register.Indirect,
   1523                                         attrib_index,
   1524                                         swizzle_index);
   1525       assert(res2);
   1526       res = emit_fetch_64bit(bld_base, stype, res, res2);
   1527    } else if (stype == TGSI_TYPE_UNSIGNED) {
   1528       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
   1529    } else if (stype == TGSI_TYPE_SIGNED) {
   1530       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
   1531    }
   1532 
   1533    return res;
   1534 }
   1535 
   1536 static LLVMValueRef
   1537 emit_fetch_temporary(
   1538    struct lp_build_tgsi_context * bld_base,
   1539    const struct tgsi_full_src_register * reg,
   1540    enum tgsi_opcode_type stype,
   1541    unsigned swizzle)
   1542 {
   1543    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1544    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1545    LLVMBuilderRef builder = gallivm->builder;
   1546    LLVMValueRef res;
   1547 
   1548    if (reg->Register.Indirect) {
   1549       LLVMValueRef indirect_index;
   1550       LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
   1551       LLVMValueRef temps_array;
   1552       LLVMTypeRef fptr_type;
   1553 
   1554       indirect_index = get_indirect_index(bld,
   1555                                           reg->Register.File,
   1556                                           reg->Register.Index,
   1557                                           &reg->Indirect);
   1558 
   1559       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
   1560                                         indirect_index,
   1561                                         swizzle,
   1562                                         TRUE);
   1563       if (tgsi_type_is_64bit(stype)) {
   1564                index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
   1565                                                   indirect_index,
   1566                                                   swizzle + 1,
   1567                                                   TRUE);
   1568       }
   1569 
   1570       /* cast temps_array pointer to float* */
   1571       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1572       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
   1573 
   1574       /* Gather values from the temporary register array */
   1575       res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
   1576    }
   1577    else {
   1578       LLVMValueRef temp_ptr;
   1579       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
   1580       res = LLVMBuildLoad(builder, temp_ptr, "");
   1581 
   1582       if (tgsi_type_is_64bit(stype)) {
   1583          LLVMValueRef temp_ptr2, res2;
   1584 
   1585          temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
   1586          res2 = LLVMBuildLoad(builder, temp_ptr2, "");
   1587          res = emit_fetch_64bit(bld_base, stype, res, res2);
   1588       }
   1589    }
   1590 
   1591    if (stype == TGSI_TYPE_SIGNED ||
   1592        stype == TGSI_TYPE_UNSIGNED ||
   1593        stype == TGSI_TYPE_DOUBLE ||
   1594        stype == TGSI_TYPE_SIGNED64 ||
   1595        stype == TGSI_TYPE_UNSIGNED64) {
   1596       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
   1597       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
   1598    }
   1599 
   1600    return res;
   1601 }
   1602 
   1603 static LLVMValueRef
   1604 emit_fetch_system_value(
   1605    struct lp_build_tgsi_context * bld_base,
   1606    const struct tgsi_full_src_register * reg,
   1607    enum tgsi_opcode_type stype,
   1608    unsigned swizzle)
   1609 {
   1610    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1611    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1612    const struct tgsi_shader_info *info = bld->bld_base.info;
   1613    LLVMBuilderRef builder = gallivm->builder;
   1614    LLVMValueRef res;
   1615    enum tgsi_opcode_type atype; // Actual type of the value
   1616 
   1617    assert(!reg->Register.Indirect);
   1618 
   1619    switch (info->system_value_semantic_name[reg->Register.Index]) {
   1620    case TGSI_SEMANTIC_INSTANCEID:
   1621       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
   1622       atype = TGSI_TYPE_UNSIGNED;
   1623       break;
   1624 
   1625    case TGSI_SEMANTIC_VERTEXID:
   1626       res = bld->system_values.vertex_id;
   1627       atype = TGSI_TYPE_UNSIGNED;
   1628       break;
   1629 
   1630    case TGSI_SEMANTIC_VERTEXID_NOBASE:
   1631       res = bld->system_values.vertex_id_nobase;
   1632       atype = TGSI_TYPE_UNSIGNED;
   1633       break;
   1634 
   1635    case TGSI_SEMANTIC_BASEVERTEX:
   1636       res = bld->system_values.basevertex;
   1637       atype = TGSI_TYPE_UNSIGNED;
   1638       break;
   1639 
   1640    case TGSI_SEMANTIC_PRIMID:
   1641       res = bld->system_values.prim_id;
   1642       atype = TGSI_TYPE_UNSIGNED;
   1643       break;
   1644 
   1645    case TGSI_SEMANTIC_INVOCATIONID:
   1646       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
   1647       atype = TGSI_TYPE_UNSIGNED;
   1648       break;
   1649 
   1650    default:
   1651       assert(!"unexpected semantic in emit_fetch_system_value");
   1652       res = bld_base->base.zero;
   1653       atype = TGSI_TYPE_FLOAT;
   1654       break;
   1655    }
   1656 
   1657    if (atype != stype) {
   1658       if (stype == TGSI_TYPE_FLOAT) {
   1659          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
   1660       } else if (stype == TGSI_TYPE_UNSIGNED) {
   1661          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
   1662       } else if (stype == TGSI_TYPE_SIGNED) {
   1663          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
   1664       }
   1665    }
   1666 
   1667    return res;
   1668 }
   1669 
   1670 /**
   1671  * Register fetch with derivatives.
   1672  */
   1673 static void
   1674 emit_fetch_deriv(
   1675    struct lp_build_tgsi_soa_context *bld,
   1676    LLVMValueRef src,
   1677    LLVMValueRef *res,
   1678    LLVMValueRef *ddx,
   1679    LLVMValueRef *ddy)
   1680 {
   1681    if (res)
   1682       *res = src;
   1683 
   1684    /* TODO: use interpolation coeffs for inputs */
   1685 
   1686    if (ddx)
   1687       *ddx = lp_build_ddx(&bld->bld_base.base, src);
   1688 
   1689    if (ddy)
   1690       *ddy = lp_build_ddy(&bld->bld_base.base, src);
   1691 }
   1692 
   1693 /**
   1694  * store an array of vec-length 64-bit into two arrays of vec_length floats
   1695  * i.e.
   1696  * value is d0, d1, d2, d3 etc.
   1697  * each 64-bit has high and low pieces x, y
   1698  * so gets stored into the separate channels as:
   1699  * chan_ptr = d0.x, d1.x, d2.x, d3.x
   1700  * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
   1701  */
   1702 static void
   1703 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
   1704                       LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
   1705                       LLVMValueRef value)
   1706 {
   1707    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1708    struct gallivm_state *gallivm = bld_base->base.gallivm;
   1709    LLVMBuilderRef builder = gallivm->builder;
   1710    struct lp_build_context *float_bld = &bld_base->base;
   1711    unsigned i;
   1712    LLVMValueRef temp, temp2;
   1713    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
   1714    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
   1715 
   1716    for (i = 0; i < bld_base->base.type.length; i++) {
   1717       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
   1718       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
   1719    }
   1720 
   1721    temp = LLVMBuildShuffleVector(builder, value,
   1722                                  LLVMGetUndef(LLVMTypeOf(value)),
   1723                                  LLVMConstVector(shuffles,
   1724                                                  bld_base->base.type.length),
   1725                                  "");
   1726    temp2 = LLVMBuildShuffleVector(builder, value,
   1727                                   LLVMGetUndef(LLVMTypeOf(value)),
   1728                                   LLVMConstVector(shuffles2,
   1729                                                   bld_base->base.type.length),
   1730                                   "");
   1731 
   1732    lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
   1733    lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
   1734 }
   1735 
   1736 /**
   1737  * Register store.
   1738  */
   1739 static void
   1740 emit_store_chan(
   1741    struct lp_build_tgsi_context *bld_base,
   1742    const struct tgsi_full_instruction *inst,
   1743    unsigned index,
   1744    unsigned chan_index,
   1745    LLVMValueRef value)
   1746 {
   1747    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1748    struct gallivm_state *gallivm = bld_base->base.gallivm;
   1749    LLVMBuilderRef builder = gallivm->builder;
   1750    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
   1751    struct lp_build_context *float_bld = &bld_base->base;
   1752    struct lp_build_context *int_bld = &bld_base->int_bld;
   1753    LLVMValueRef indirect_index = NULL;
   1754    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
   1755 
   1756    /*
   1757     * Apply saturation.
   1758     *
   1759     * It is always assumed to be float.
   1760     */
   1761    if (inst->Instruction.Saturate) {
   1762       assert(dtype == TGSI_TYPE_FLOAT ||
   1763              dtype == TGSI_TYPE_UNTYPED);
   1764       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
   1765       value = lp_build_clamp_zero_one_nanzero(float_bld, value);
   1766    }
   1767 
   1768    if (reg->Register.Indirect) {
   1769       /*
   1770        * Currently the mesa/st doesn't generate indirect stores
   1771        * to 64-bit values, it normally uses MOV to do indirect stores.
   1772        */
   1773       assert(!tgsi_type_is_64bit(dtype));
   1774       indirect_index = get_indirect_index(bld,
   1775                                           reg->Register.File,
   1776                                           reg->Register.Index,
   1777                                           &reg->Indirect);
   1778    } else {
   1779       assert(reg->Register.Index <=
   1780                              bld_base->info->file_max[reg->Register.File]);
   1781    }
   1782 
   1783    if (DEBUG_EXECUTION) {
   1784       emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
   1785    }
   1786 
   1787    switch( reg->Register.File ) {
   1788    case TGSI_FILE_OUTPUT:
   1789       /* Outputs are always stored as floats */
   1790       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
   1791 
   1792       if (reg->Register.Indirect) {
   1793          LLVMValueRef index_vec;  /* indexes into the output registers */
   1794          LLVMValueRef outputs_array;
   1795          LLVMTypeRef fptr_type;
   1796 
   1797          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
   1798                                            indirect_index,
   1799                                            chan_index,
   1800                                            TRUE);
   1801 
   1802          fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1803          outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
   1804 
   1805          /* Scatter store values into output registers */
   1806          emit_mask_scatter(bld, outputs_array, index_vec, value,
   1807                            &bld->exec_mask);
   1808       }
   1809       else {
   1810          LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
   1811                                                   chan_index);
   1812 
   1813          if (tgsi_type_is_64bit(dtype)) {
   1814             LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
   1815                                                       chan_index + 1);
   1816             emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
   1817                                   value);
   1818          } else
   1819             lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
   1820       }
   1821       break;
   1822 
   1823    case TGSI_FILE_TEMPORARY:
   1824       /* Temporaries are always stored as floats */
   1825       if (!tgsi_type_is_64bit(dtype))
   1826          value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
   1827       else
   1828          value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
   1829 
   1830       if (reg->Register.Indirect) {
   1831          LLVMValueRef index_vec;  /* indexes into the temp registers */
   1832          LLVMValueRef temps_array;
   1833          LLVMTypeRef fptr_type;
   1834 
   1835          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
   1836                                            indirect_index,
   1837                                            chan_index,
   1838                                            TRUE);
   1839 
   1840          fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1841          temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
   1842 
   1843          /* Scatter store values into temp registers */
   1844          emit_mask_scatter(bld, temps_array, index_vec, value,
   1845                            &bld->exec_mask);
   1846       }
   1847       else {
   1848          LLVMValueRef temp_ptr;
   1849          temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
   1850 
   1851          if (tgsi_type_is_64bit(dtype)) {
   1852             LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
   1853                                                          reg->Register.Index,
   1854                                                          chan_index + 1);
   1855             emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
   1856                                   value);
   1857          }
   1858          else
   1859             lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
   1860       }
   1861       break;
   1862 
   1863    case TGSI_FILE_ADDRESS:
   1864       assert(dtype == TGSI_TYPE_SIGNED);
   1865       assert(LLVMTypeOf(value) == int_bld->vec_type);
   1866       value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
   1867       lp_exec_mask_store(&bld->exec_mask, int_bld, value,
   1868                          bld->addr[reg->Register.Index][chan_index]);
   1869       break;
   1870 
   1871    default:
   1872       assert( 0 );
   1873    }
   1874 
   1875    (void)dtype;
   1876 }
   1877 
   1878 /*
   1879  * Called at the beginning of the translation of each TGSI instruction, to
   1880  * emit some debug code.
   1881  */
   1882 static void
   1883 emit_debug(
   1884    struct lp_build_tgsi_context * bld_base,
   1885    const struct tgsi_full_instruction * inst,
   1886    const struct tgsi_opcode_info * info)
   1887 
   1888 {
   1889    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1890 
   1891    if (DEBUG_EXECUTION) {
   1892       /*
   1893        * Dump the TGSI instruction.
   1894        */
   1895 
   1896       struct gallivm_state *gallivm = bld_base->base.gallivm;
   1897       char buf[512];
   1898       buf[0] = '$';
   1899       buf[1] = ' ';
   1900       tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
   1901       lp_build_printf(gallivm, buf);
   1902 
   1903       /* Dump the execution mask.
   1904        */
   1905       if (bld->exec_mask.has_mask) {
   1906          lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
   1907       }
   1908    }
   1909 }
   1910 
   1911 static void
   1912 emit_store(
   1913    struct lp_build_tgsi_context * bld_base,
   1914    const struct tgsi_full_instruction * inst,
   1915    const struct tgsi_opcode_info * info,
   1916    unsigned index,
   1917    LLVMValueRef dst[4])
   1918 
   1919 {
   1920    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
   1921 
   1922    unsigned writemask = inst->Dst[index].Register.WriteMask;
   1923    while (writemask) {
   1924       unsigned chan_index = u_bit_scan(&writemask);
   1925       if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
   1926           continue;
   1927       emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
   1928    }
   1929 }
   1930 
   1931 static unsigned
   1932 tgsi_to_pipe_tex_target(unsigned tgsi_target)
   1933 {
   1934    switch (tgsi_target) {
   1935    case TGSI_TEXTURE_BUFFER:
   1936       return PIPE_BUFFER;
   1937    case TGSI_TEXTURE_1D:
   1938    case TGSI_TEXTURE_SHADOW1D:
   1939       return PIPE_TEXTURE_1D;
   1940    case TGSI_TEXTURE_2D:
   1941    case TGSI_TEXTURE_SHADOW2D:
   1942    case TGSI_TEXTURE_2D_MSAA:
   1943       return PIPE_TEXTURE_2D;
   1944    case TGSI_TEXTURE_3D:
   1945       return PIPE_TEXTURE_3D;
   1946    case TGSI_TEXTURE_CUBE:
   1947    case TGSI_TEXTURE_SHADOWCUBE:
   1948       return PIPE_TEXTURE_CUBE;
   1949    case TGSI_TEXTURE_RECT:
   1950    case TGSI_TEXTURE_SHADOWRECT:
   1951       return PIPE_TEXTURE_RECT;
   1952    case TGSI_TEXTURE_1D_ARRAY:
   1953    case TGSI_TEXTURE_SHADOW1D_ARRAY:
   1954       return PIPE_TEXTURE_1D_ARRAY;
   1955    case TGSI_TEXTURE_2D_ARRAY:
   1956    case TGSI_TEXTURE_SHADOW2D_ARRAY:
   1957    case TGSI_TEXTURE_2D_ARRAY_MSAA:
   1958       return PIPE_TEXTURE_2D_ARRAY;
   1959    case TGSI_TEXTURE_CUBE_ARRAY:
   1960    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
   1961       return PIPE_TEXTURE_CUBE_ARRAY;
   1962    default:
   1963       assert(0);
   1964       return PIPE_BUFFER;
   1965    }
   1966 }
   1967 
   1968 
   1969 static enum lp_sampler_lod_property
   1970 lp_build_lod_property(
   1971    struct lp_build_tgsi_context *bld_base,
   1972    const struct tgsi_full_instruction *inst,
   1973    unsigned src_op)
   1974 {
   1975    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
   1976    enum lp_sampler_lod_property lod_property;
   1977 
   1978    /*
   1979     * Not much we can do here. We could try catching inputs declared
   1980     * with constant interpolation but not sure it's worth it - since for
   1981     * TEX opcodes as well as FETCH/LD the lod comes from same reg as
   1982     * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
   1983     * like the constant/immediate recognition below.
   1984     * What seems to be of more value would be to recognize temps holding
   1985     * broadcasted scalars but no way we can do it.
   1986     * Tried asking llvm but without any success (using LLVMIsConstant
   1987     * even though this isn't exactly what we'd need), even as simple as
   1988     * IMM[0] UINT32 (0,-1,0,0)
   1989     * MOV TEMP[0] IMM[0].yyyy
   1990     * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
   1991     * doesn't work.
   1992     * This means there's ZERO chance this will ever catch a scalar lod
   1993     * with traditional tex opcodes as well as texel fetches, since the lod
   1994     * comes from the same reg as coords (except some test shaders using
   1995     * constant coords maybe).
   1996     * There's at least hope for sample opcodes as well as size queries.
   1997     */
   1998    if (reg->Register.File == TGSI_FILE_CONSTANT ||
   1999        reg->Register.File == TGSI_FILE_IMMEDIATE) {
   2000       lod_property = LP_SAMPLER_LOD_SCALAR;
   2001    }
   2002    else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
   2003       if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
   2004          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2005       }
   2006       else {
   2007          lod_property = LP_SAMPLER_LOD_PER_QUAD;
   2008       }
   2009    }
   2010    else {
   2011       /* never use scalar (per-quad) lod the results are just too wrong. */
   2012       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2013    }
   2014    return lod_property;
   2015 }
   2016 
   2017 
   2018 /**
   2019  * High-level instruction translators.
   2020  */
   2021 
   2022 static void
   2023 emit_tex( struct lp_build_tgsi_soa_context *bld,
   2024           const struct tgsi_full_instruction *inst,
   2025           enum lp_build_tex_modifier modifier,
   2026           LLVMValueRef *texel,
   2027           unsigned sampler_reg,
   2028           enum lp_sampler_op_type sampler_op)
   2029 {
   2030    unsigned unit = inst->Src[sampler_reg].Register.Index;
   2031    LLVMValueRef oow = NULL;
   2032    LLVMValueRef lod = NULL;
   2033    LLVMValueRef coords[5];
   2034    LLVMValueRef offsets[3] = { NULL };
   2035    struct lp_derivatives derivs;
   2036    struct lp_sampler_params params;
   2037    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
   2038    unsigned num_derivs, num_offsets, i;
   2039    unsigned shadow_coord = 0;
   2040    unsigned layer_coord = 0;
   2041    unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
   2042 
   2043    memset(&params, 0, sizeof(params));
   2044 
   2045    if (!bld->sampler) {
   2046       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
   2047       for (i = 0; i < 4; i++) {
   2048          texel[i] = bld->bld_base.base.undef;
   2049       }
   2050       return;
   2051    }
   2052 
   2053    switch (inst->Texture.Texture) {
   2054    case TGSI_TEXTURE_1D_ARRAY:
   2055       layer_coord = 1;
   2056       /* fallthrough */
   2057    case TGSI_TEXTURE_1D:
   2058       num_offsets = 1;
   2059       num_derivs = 1;
   2060       break;
   2061    case TGSI_TEXTURE_2D_ARRAY:
   2062       layer_coord = 2;
   2063       /* fallthrough */
   2064    case TGSI_TEXTURE_2D:
   2065    case TGSI_TEXTURE_RECT:
   2066       num_offsets = 2;
   2067       num_derivs = 2;
   2068       break;
   2069    case TGSI_TEXTURE_SHADOW1D_ARRAY:
   2070       layer_coord = 1;
   2071       /* fallthrough */
   2072    case TGSI_TEXTURE_SHADOW1D:
   2073       shadow_coord = 2;
   2074       num_offsets = 1;
   2075       num_derivs = 1;
   2076       break;
   2077    case TGSI_TEXTURE_SHADOW2D_ARRAY:
   2078       layer_coord = 2;
   2079       shadow_coord = 3;
   2080       num_offsets = 2;
   2081       num_derivs = 2;
   2082       break;
   2083    case TGSI_TEXTURE_SHADOW2D:
   2084    case TGSI_TEXTURE_SHADOWRECT:
   2085       shadow_coord = 2;
   2086       num_offsets = 2;
   2087       num_derivs = 2;
   2088       break;
   2089    case TGSI_TEXTURE_CUBE:
   2090       num_offsets = 2;
   2091       num_derivs = 3;
   2092       break;
   2093    case TGSI_TEXTURE_3D:
   2094       num_offsets = 3;
   2095       num_derivs = 3;
   2096       break;
   2097    case TGSI_TEXTURE_SHADOWCUBE:
   2098       shadow_coord = 3;
   2099       num_offsets = 2;
   2100       num_derivs = 3;
   2101       break;
   2102    case TGSI_TEXTURE_CUBE_ARRAY:
   2103       num_offsets = 2;
   2104       num_derivs = 3;
   2105       layer_coord = 3;
   2106       break;
   2107    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
   2108       num_offsets = 2;
   2109       num_derivs = 3;
   2110       layer_coord = 3;
   2111       shadow_coord = 4; /* shadow coord special different reg */
   2112       break;
   2113    case TGSI_TEXTURE_2D_MSAA:
   2114    case TGSI_TEXTURE_2D_ARRAY_MSAA:
   2115    default:
   2116       assert(0);
   2117       return;
   2118    }
   2119 
   2120    /* Note lod and especially projected are illegal in a LOT of cases */
   2121    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
   2122        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
   2123       if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
   2124           inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
   2125          /* note that shadow cube array with bias/explicit lod does not exist */
   2126          lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
   2127       }
   2128       else {
   2129          lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
   2130       }
   2131       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
   2132          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2133       }
   2134       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
   2135          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2136       }
   2137       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
   2138    }
   2139 
   2140    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
   2141       oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
   2142       oow = lp_build_rcp(&bld->bld_base.base, oow);
   2143    }
   2144 
   2145    for (i = 0; i < num_derivs; i++) {
   2146       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
   2147       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
   2148          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
   2149    }
   2150    for (i = num_derivs; i < 5; i++) {
   2151       coords[i] = bld->bld_base.base.undef;
   2152    }
   2153 
   2154    /* Layer coord always goes into 3rd slot, except for cube map arrays */
   2155    if (layer_coord) {
   2156       if (layer_coord == 3) {
   2157          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
   2158       }
   2159       else {
   2160          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
   2161       }
   2162       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
   2163          coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
   2164    }
   2165    /* Shadow coord occupies always 5th slot. */
   2166    if (shadow_coord) {
   2167       sample_key |= LP_SAMPLER_SHADOW;
   2168       if (shadow_coord == 4) {
   2169          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
   2170       }
   2171       else {
   2172          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
   2173       }
   2174       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
   2175          coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
   2176    }
   2177 
   2178    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
   2179       unsigned dim;
   2180       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2181       for (dim = 0; dim < num_derivs; ++dim) {
   2182          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
   2183          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
   2184       }
   2185       params.derivs = &derivs;
   2186       /*
   2187        * could also check all src regs if constant but I doubt such
   2188        * cases exist in practice.
   2189        */
   2190       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
   2191          if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
   2192             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2193          }
   2194          else {
   2195             lod_property = LP_SAMPLER_LOD_PER_QUAD;
   2196          }
   2197       }
   2198       else {
   2199          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2200       }
   2201    }
   2202    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
   2203 
   2204    /* we don't handle the 4 offset version of tg4 */
   2205    if (inst->Texture.NumOffsets == 1) {
   2206       unsigned dim;
   2207       sample_key |= LP_SAMPLER_OFFSETS;
   2208       for (dim = 0; dim < num_offsets; dim++) {
   2209          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
   2210       }
   2211    }
   2212 
   2213    params.type = bld->bld_base.base.type;
   2214    params.sample_key = sample_key;
   2215    params.texture_index = unit;
   2216    params.sampler_index = unit;
   2217    params.context_ptr = bld->context_ptr;
   2218    params.thread_data_ptr = bld->thread_data_ptr;
   2219    params.coords = coords;
   2220    params.offsets = offsets;
   2221    params.lod = lod;
   2222    params.texel = texel;
   2223 
   2224    bld->sampler->emit_tex_sample(bld->sampler,
   2225                                  bld->bld_base.base.gallivm,
   2226                                  &params);
   2227 }
   2228 
   2229 static void
   2230 emit_sample(struct lp_build_tgsi_soa_context *bld,
   2231             const struct tgsi_full_instruction *inst,
   2232             enum lp_build_tex_modifier modifier,
   2233             boolean compare,
   2234             enum lp_sampler_op_type sample_type,
   2235             LLVMValueRef *texel)
   2236 {
   2237    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   2238    unsigned texture_unit, sampler_unit;
   2239    LLVMValueRef lod = NULL;
   2240    LLVMValueRef coords[5];
   2241    LLVMValueRef offsets[3] = { NULL };
   2242    struct lp_derivatives derivs;
   2243    struct lp_sampler_params params;
   2244    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
   2245 
   2246    unsigned num_offsets, num_derivs, i;
   2247    unsigned layer_coord = 0;
   2248    unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
   2249 
   2250    memset(&params, 0, sizeof(params));
   2251 
   2252    if (!bld->sampler) {
   2253       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
   2254       for (i = 0; i < 4; i++) {
   2255          texel[i] = bld->bld_base.base.undef;
   2256       }
   2257       return;
   2258    }
   2259 
   2260    /*
   2261     * unlike old-style tex opcodes the texture/sampler indices
   2262     * always come from src1 and src2 respectively.
   2263     */
   2264    texture_unit = inst->Src[1].Register.Index;
   2265    sampler_unit = inst->Src[2].Register.Index;
   2266 
   2267    /*
   2268     * Note inst->Texture.Texture will contain the number of offsets,
   2269     * however the target information is NOT there and comes from the
   2270     * declared sampler views instead.
   2271     */
   2272    switch (bld->sv[texture_unit].Resource) {
   2273    case TGSI_TEXTURE_1D:
   2274       num_offsets = 1;
   2275       num_derivs = 1;
   2276       break;
   2277    case TGSI_TEXTURE_1D_ARRAY:
   2278       layer_coord = 1;
   2279       num_offsets = 1;
   2280       num_derivs = 1;
   2281       break;
   2282    case TGSI_TEXTURE_2D:
   2283    case TGSI_TEXTURE_RECT:
   2284       num_offsets = 2;
   2285       num_derivs = 2;
   2286       break;
   2287    case TGSI_TEXTURE_2D_ARRAY:
   2288       layer_coord = 2;
   2289       num_offsets = 2;
   2290       num_derivs = 2;
   2291       break;
   2292    case TGSI_TEXTURE_CUBE:
   2293       num_offsets = 2;
   2294       num_derivs = 3;
   2295       break;
   2296    case TGSI_TEXTURE_3D:
   2297       num_offsets = 3;
   2298       num_derivs = 3;
   2299       break;
   2300    case TGSI_TEXTURE_CUBE_ARRAY:
   2301       layer_coord = 3;
   2302       num_offsets = 2;
   2303       num_derivs = 3;
   2304       break;
   2305    default:
   2306       assert(0);
   2307       return;
   2308    }
   2309 
   2310    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
   2311        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
   2312       lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
   2313       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
   2314          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2315       }
   2316       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
   2317          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2318       }
   2319       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
   2320    }
   2321    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
   2322       /* XXX might be better to explicitly pass the level zero information */
   2323       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2324       lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
   2325    }
   2326 
   2327    for (i = 0; i < num_derivs; i++) {
   2328       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
   2329    }
   2330    for (i = num_derivs; i < 5; i++) {
   2331       coords[i] = bld->bld_base.base.undef;
   2332    }
   2333 
   2334    /* Layer coord always goes into 3rd slot, except for cube map arrays */
   2335    if (layer_coord) {
   2336       if (layer_coord == 3)
   2337          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
   2338       else
   2339          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
   2340    }
   2341    /* Shadow coord occupies always 5th slot. */
   2342    if (compare) {
   2343       sample_key |= LP_SAMPLER_SHADOW;
   2344       coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
   2345    }
   2346 
   2347    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
   2348       unsigned dim;
   2349       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2350       for (dim = 0; dim < num_derivs; ++dim) {
   2351          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
   2352          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
   2353       }
   2354       params.derivs = &derivs;
   2355       /*
   2356        * could also check all src regs if constant but I doubt such
   2357        * cases exist in practice.
   2358        */
   2359       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
   2360          if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
   2361             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2362          }
   2363          else {
   2364             lod_property = LP_SAMPLER_LOD_PER_QUAD;
   2365          }
   2366       }
   2367       else {
   2368          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2369       }
   2370    }
   2371 
   2372    /* some advanced gather instructions (txgo) would require 4 offsets */
   2373    if (inst->Texture.NumOffsets == 1) {
   2374       unsigned dim;
   2375       sample_key |= LP_SAMPLER_OFFSETS;
   2376       for (dim = 0; dim < num_offsets; dim++) {
   2377          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
   2378       }
   2379    }
   2380    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
   2381 
   2382    params.type = bld->bld_base.base.type;
   2383    params.sample_key = sample_key;
   2384    params.texture_index = texture_unit;
   2385    params.sampler_index = sampler_unit;
   2386    params.context_ptr = bld->context_ptr;
   2387    params.thread_data_ptr = bld->thread_data_ptr;
   2388    params.coords = coords;
   2389    params.offsets = offsets;
   2390    params.lod = lod;
   2391    params.texel = texel;
   2392 
   2393    bld->sampler->emit_tex_sample(bld->sampler,
   2394                                  bld->bld_base.base.gallivm,
   2395                                  &params);
   2396 
   2397    if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
   2398        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
   2399        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
   2400        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
   2401       unsigned char swizzles[4];
   2402       swizzles[0] = inst->Src[1].Register.SwizzleX;
   2403       swizzles[1] = inst->Src[1].Register.SwizzleY;
   2404       swizzles[2] = inst->Src[1].Register.SwizzleZ;
   2405       swizzles[3] = inst->Src[1].Register.SwizzleW;
   2406 
   2407       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
   2408    }
   2409 }
   2410 
   2411 static void
   2412 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
   2413                    const struct tgsi_full_instruction *inst,
   2414                    LLVMValueRef *texel,
   2415                    boolean is_samplei)
   2416 {
   2417    unsigned unit, target;
   2418    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
   2419    LLVMValueRef explicit_lod = NULL;
   2420    LLVMValueRef coords[5];
   2421    LLVMValueRef offsets[3] = { NULL };
   2422    struct lp_sampler_params params;
   2423    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
   2424    unsigned dims, i;
   2425    unsigned layer_coord = 0;
   2426    unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
   2427 
   2428    memset(&params, 0, sizeof(params));
   2429 
   2430    if (!bld->sampler) {
   2431       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
   2432       for (i = 0; i < 4; i++) {
   2433          texel[i] = coord_undef;
   2434       }
   2435       return;
   2436    }
   2437 
   2438    unit = inst->Src[1].Register.Index;
   2439 
   2440    if (is_samplei) {
   2441       target = bld->sv[unit].Resource;
   2442    }
   2443    else {
   2444       target = inst->Texture.Texture;
   2445    }
   2446 
   2447    switch (target) {
   2448    case TGSI_TEXTURE_1D:
   2449    case TGSI_TEXTURE_BUFFER:
   2450       dims = 1;
   2451       break;
   2452    case TGSI_TEXTURE_1D_ARRAY:
   2453       layer_coord = 1;
   2454       dims = 1;
   2455       break;
   2456    case TGSI_TEXTURE_2D:
   2457    case TGSI_TEXTURE_RECT:
   2458    case TGSI_TEXTURE_2D_MSAA:
   2459       dims = 2;
   2460       break;
   2461    case TGSI_TEXTURE_2D_ARRAY:
   2462    case TGSI_TEXTURE_2D_ARRAY_MSAA:
   2463       layer_coord = 2;
   2464       dims = 2;
   2465       break;
   2466    case TGSI_TEXTURE_3D:
   2467       dims = 3;
   2468       break;
   2469    default:
   2470       assert(0);
   2471       return;
   2472    }
   2473 
   2474    /* always have lod except for buffers and msaa targets ? */
   2475    if (target != TGSI_TEXTURE_BUFFER &&
   2476        target != TGSI_TEXTURE_2D_MSAA &&
   2477        target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
   2478       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2479       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
   2480       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
   2481    }
   2482    /*
   2483     * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
   2484     * would be the sample index.
   2485     */
   2486 
   2487    for (i = 0; i < dims; i++) {
   2488       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
   2489    }
   2490    /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
   2491    for (i = dims; i < 5; i++) {
   2492       coords[i] = coord_undef;
   2493    }
   2494    if (layer_coord)
   2495       coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
   2496 
   2497    if (inst->Texture.NumOffsets == 1) {
   2498       unsigned dim;
   2499       sample_key |= LP_SAMPLER_OFFSETS;
   2500       for (dim = 0; dim < dims; dim++) {
   2501          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
   2502       }
   2503    }
   2504    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
   2505 
   2506    params.type = bld->bld_base.base.type;
   2507    params.sample_key = sample_key;
   2508    params.texture_index = unit;
   2509    /*
   2510     * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
   2511     * and trigger some assertions with d3d10 where the sampler view number
   2512     * can exceed this.
   2513     */
   2514    params.sampler_index = 0;
   2515    params.context_ptr = bld->context_ptr;
   2516    params.thread_data_ptr = bld->thread_data_ptr;
   2517    params.coords = coords;
   2518    params.offsets = offsets;
   2519    params.derivs = NULL;
   2520    params.lod = explicit_lod;
   2521    params.texel = texel;
   2522 
   2523    bld->sampler->emit_tex_sample(bld->sampler,
   2524                                  bld->bld_base.base.gallivm,
   2525                                  &params);
   2526 
   2527    if (is_samplei &&
   2528        (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
   2529         inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
   2530         inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
   2531         inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
   2532       unsigned char swizzles[4];
   2533       swizzles[0] = inst->Src[1].Register.SwizzleX;
   2534       swizzles[1] = inst->Src[1].Register.SwizzleY;
   2535       swizzles[2] = inst->Src[1].Register.SwizzleZ;
   2536       swizzles[3] = inst->Src[1].Register.SwizzleW;
   2537 
   2538       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
   2539    }
   2540 }
   2541 
   2542 static void
   2543 emit_size_query( struct lp_build_tgsi_soa_context *bld,
   2544                  const struct tgsi_full_instruction *inst,
   2545                  LLVMValueRef *sizes_out,
   2546                  boolean is_sviewinfo)
   2547 {
   2548    LLVMValueRef explicit_lod;
   2549    enum lp_sampler_lod_property lod_property;
   2550    unsigned has_lod;
   2551    unsigned i;
   2552    unsigned unit = inst->Src[1].Register.Index;
   2553    unsigned target, pipe_target;
   2554    struct lp_sampler_size_query_params params;
   2555 
   2556    if (is_sviewinfo) {
   2557       target = bld->sv[unit].Resource;
   2558    }
   2559    else {
   2560       target = inst->Texture.Texture;
   2561    }
   2562    switch (target) {
   2563    case TGSI_TEXTURE_BUFFER:
   2564    case TGSI_TEXTURE_RECT:
   2565    case TGSI_TEXTURE_SHADOWRECT:
   2566       has_lod = 0;
   2567       break;
   2568    default:
   2569       has_lod = 1;
   2570       break;
   2571    }
   2572 
   2573    if (!bld->sampler) {
   2574       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
   2575       for (i = 0; i < 4; i++)
   2576          sizes_out[i] = bld->bld_base.int_bld.undef;
   2577       return;
   2578    }
   2579 
   2580    if (has_lod) {
   2581       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
   2582       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
   2583    }
   2584    else {
   2585       explicit_lod = NULL;
   2586       lod_property = LP_SAMPLER_LOD_SCALAR;
   2587    }
   2588 
   2589 
   2590    pipe_target = tgsi_to_pipe_tex_target(target);
   2591 
   2592    params.int_type = bld->bld_base.int_bld.type;
   2593    params.texture_unit = unit;
   2594    params.target = pipe_target;
   2595    params.context_ptr = bld->context_ptr;
   2596    params.is_sviewinfo = TRUE;
   2597    params.lod_property = lod_property;
   2598    params.explicit_lod = explicit_lod;
   2599    params.sizes_out = sizes_out;
   2600 
   2601    bld->sampler->emit_size_query(bld->sampler,
   2602                                  bld->bld_base.base.gallivm,
   2603                                  &params);
   2604 }
   2605 
   2606 static boolean
   2607 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
   2608                    int pc)
   2609 {
   2610    unsigned i;
   2611 
   2612    for (i = 0; i < 5; i++) {
   2613       unsigned opcode;
   2614 
   2615       if (pc + i >= bld->bld_base.info->num_instructions)
   2616          return TRUE;
   2617 
   2618       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
   2619 
   2620       if (opcode == TGSI_OPCODE_END)
   2621          return TRUE;
   2622 
   2623       if (opcode == TGSI_OPCODE_TEX ||
   2624          opcode == TGSI_OPCODE_TXP ||
   2625          opcode == TGSI_OPCODE_TXD ||
   2626          opcode == TGSI_OPCODE_TXB ||
   2627          opcode == TGSI_OPCODE_TXL ||
   2628          opcode == TGSI_OPCODE_TXF ||
   2629          opcode == TGSI_OPCODE_TXQ ||
   2630          opcode == TGSI_OPCODE_TEX2 ||
   2631          opcode == TGSI_OPCODE_TXB2 ||
   2632          opcode == TGSI_OPCODE_TXL2 ||
   2633          opcode == TGSI_OPCODE_SAMPLE ||
   2634          opcode == TGSI_OPCODE_SAMPLE_B ||
   2635          opcode == TGSI_OPCODE_SAMPLE_C ||
   2636          opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
   2637          opcode == TGSI_OPCODE_SAMPLE_D ||
   2638          opcode == TGSI_OPCODE_SAMPLE_I ||
   2639          opcode == TGSI_OPCODE_SAMPLE_I_MS ||
   2640          opcode == TGSI_OPCODE_SAMPLE_L ||
   2641          opcode == TGSI_OPCODE_SVIEWINFO ||
   2642          opcode == TGSI_OPCODE_CAL ||
   2643          opcode == TGSI_OPCODE_IF ||
   2644          opcode == TGSI_OPCODE_UIF ||
   2645          opcode == TGSI_OPCODE_BGNLOOP ||
   2646          opcode == TGSI_OPCODE_SWITCH)
   2647          return FALSE;
   2648    }
   2649 
   2650    return TRUE;
   2651 }
   2652 
   2653 
   2654 
   2655 /**
   2656  * Kill fragment if any of the src register values are negative.
   2657  */
   2658 static void
   2659 emit_kill_if(
   2660    struct lp_build_tgsi_soa_context *bld,
   2661    const struct tgsi_full_instruction *inst,
   2662    int pc)
   2663 {
   2664    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   2665    const struct tgsi_full_src_register *reg = &inst->Src[0];
   2666    LLVMValueRef terms[TGSI_NUM_CHANNELS];
   2667    LLVMValueRef mask;
   2668    unsigned chan_index;
   2669 
   2670    memset(&terms, 0, sizeof terms);
   2671 
   2672    TGSI_FOR_EACH_CHANNEL( chan_index ) {
   2673       unsigned swizzle;
   2674 
   2675       /* Unswizzle channel */
   2676       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
   2677 
   2678       /* Check if the component has not been already tested. */
   2679       assert(swizzle < TGSI_NUM_CHANNELS);
   2680       if( !terms[swizzle] )
   2681          /* TODO: change the comparison operator instead of setting the sign */
   2682          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
   2683    }
   2684 
   2685    mask = NULL;
   2686    TGSI_FOR_EACH_CHANNEL( chan_index ) {
   2687       if(terms[chan_index]) {
   2688          LLVMValueRef chan_mask;
   2689 
   2690          /*
   2691           * If term < 0 then mask = 0 else mask = ~0.
   2692           */
   2693          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
   2694 
   2695          if(mask)
   2696             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
   2697          else
   2698             mask = chan_mask;
   2699       }
   2700    }
   2701 
   2702    if (bld->exec_mask.has_mask) {
   2703       LLVMValueRef invmask;
   2704       invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
   2705       mask = LLVMBuildOr(builder, mask, invmask, "");
   2706    }
   2707 
   2708    lp_build_mask_update(bld->mask, mask);
   2709    if (!near_end_of_shader(bld, pc))
   2710       lp_build_mask_check(bld->mask);
   2711 }
   2712 
   2713 
   2714 /**
   2715  * Unconditional fragment kill.
   2716  * The only predication is the execution mask which will apply if
   2717  * we're inside a loop or conditional.
   2718  */
   2719 static void
   2720 emit_kill(struct lp_build_tgsi_soa_context *bld,
   2721           int pc)
   2722 {
   2723    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   2724    LLVMValueRef mask;
   2725 
   2726    /* For those channels which are "alive", disable fragment shader
   2727     * execution.
   2728     */
   2729    if (bld->exec_mask.has_mask) {
   2730       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
   2731    }
   2732    else {
   2733       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
   2734       mask = zero;
   2735    }
   2736 
   2737    lp_build_mask_update(bld->mask, mask);
   2738 
   2739    if (!near_end_of_shader(bld, pc))
   2740       lp_build_mask_check(bld->mask);
   2741 }
   2742 
   2743 
   2744 /**
   2745  * Emit code which will dump the value of all the temporary registers
   2746  * to stdout.
   2747  */
   2748 static void
   2749 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
   2750                unsigned file)
   2751 {
   2752    const struct tgsi_shader_info *info = bld->bld_base.info;
   2753    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   2754    LLVMBuilderRef builder = gallivm->builder;
   2755    LLVMValueRef reg_ptr;
   2756    int index;
   2757    int max_index = info->file_max[file];
   2758 
   2759    /*
   2760     * Some register files, particularly constants, can be very large,
   2761     * and dumping everything could make this unusably slow.
   2762     */
   2763    max_index = MIN2(max_index, 32);
   2764 
   2765    for (index = 0; index <= max_index; index++) {
   2766       LLVMValueRef res;
   2767       unsigned mask;
   2768       int chan;
   2769 
   2770       if (index < 8 * sizeof(unsigned) &&
   2771           (info->file_mask[file] & (1u << index)) == 0)  {
   2772          /* This was not declared.*/
   2773          continue;
   2774       }
   2775 
   2776       if (file == TGSI_FILE_INPUT) {
   2777          mask = info->input_usage_mask[index];
   2778       } else {
   2779          mask = TGSI_WRITEMASK_XYZW;
   2780       }
   2781 
   2782       for (chan = 0; chan < 4; chan++) {
   2783          if ((mask & (1 << chan)) == 0) {
   2784             /* This channel is not used.*/
   2785             continue;
   2786          }
   2787 
   2788          if (file == TGSI_FILE_CONSTANT) {
   2789             struct tgsi_full_src_register reg;
   2790             memset(&reg, 0, sizeof reg);
   2791             reg.Register.File = file;
   2792             reg.Register.Index = index;
   2793             reg.Register.SwizzleX = 0;
   2794             reg.Register.SwizzleY = 1;
   2795             reg.Register.SwizzleZ = 2;
   2796             reg.Register.SwizzleW = 3;
   2797 
   2798             res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
   2799             if (!res) {
   2800                continue;
   2801             }
   2802          } else if (file == TGSI_FILE_INPUT) {
   2803             res = bld->inputs[index][chan];
   2804             if (!res) {
   2805                continue;
   2806             }
   2807          } else if (file == TGSI_FILE_TEMPORARY) {
   2808             reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
   2809             assert(reg_ptr);
   2810             res = LLVMBuildLoad(builder, reg_ptr, "");
   2811          } else if (file == TGSI_FILE_OUTPUT) {
   2812             reg_ptr = lp_get_output_ptr(bld, index, chan);
   2813             assert(reg_ptr);
   2814             res = LLVMBuildLoad(builder, reg_ptr, "");
   2815          } else {
   2816             assert(0);
   2817             continue;
   2818          }
   2819 
   2820          emit_dump_reg(gallivm, file, index, chan, res);
   2821       }
   2822    }
   2823 }
   2824 
   2825 
   2826 
   2827 void
   2828 lp_emit_declaration_soa(
   2829    struct lp_build_tgsi_context *bld_base,
   2830    const struct tgsi_full_declaration *decl)
   2831 {
   2832    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
   2833    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   2834    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
   2835    const unsigned first = decl->Range.First;
   2836    const unsigned last = decl->Range.Last;
   2837    unsigned idx, i;
   2838 
   2839    assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
   2840 
   2841    switch (decl->Declaration.File) {
   2842    case TGSI_FILE_TEMPORARY:
   2843       if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
   2844          assert(last < LP_MAX_INLINED_TEMPS);
   2845          for (idx = first; idx <= last; ++idx) {
   2846             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   2847                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
   2848          }
   2849       }
   2850       break;
   2851 
   2852    case TGSI_FILE_OUTPUT:
   2853       if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
   2854          for (idx = first; idx <= last; ++idx) {
   2855             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   2856                bld->outputs[idx][i] = lp_build_alloca(gallivm,
   2857                                                       vec_type, "output");
   2858          }
   2859       }
   2860       break;
   2861 
   2862    case TGSI_FILE_ADDRESS:
   2863       /* ADDR registers are only allocated with an integer LLVM IR type,
   2864        * as they are guaranteed to always have integers.
   2865        * XXX: Not sure if this exception is worthwhile (or the whole idea of
   2866        * an ADDR register for that matter).
   2867        */
   2868       assert(last < LP_MAX_TGSI_ADDRS);
   2869       for (idx = first; idx <= last; ++idx) {
   2870          assert(idx < LP_MAX_TGSI_ADDRS);
   2871          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   2872             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
   2873       }
   2874       break;
   2875 
   2876    case TGSI_FILE_SAMPLER_VIEW:
   2877       /*
   2878        * The target stored here MUST match whatever there actually
   2879        * is in the set sampler views (what about return type?).
   2880        */
   2881       assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
   2882       for (idx = first; idx <= last; ++idx) {
   2883          bld->sv[idx] = decl->SamplerView;
   2884       }
   2885       break;
   2886 
   2887    case TGSI_FILE_CONSTANT:
   2888    {
   2889       /*
   2890        * We could trivially fetch the per-buffer pointer when fetching the
   2891        * constant, relying on llvm to figure out it's always the same pointer
   2892        * anyway. However, doing so results in a huge (more than factor of 10)
   2893        * slowdown in llvm compilation times for some (but not all) shaders
   2894        * (more specifically, the IR optimization spends way more time in
   2895        * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
   2896        */
   2897       unsigned idx2D = decl->Dim.Index2D;
   2898       LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
   2899       assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
   2900       bld->consts[idx2D] =
   2901          lp_build_array_get(gallivm, bld->consts_ptr, index2D);
   2902       bld->consts_sizes[idx2D] =
   2903          lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
   2904    }
   2905       break;
   2906 
   2907    default:
   2908       /* don't need to declare other vars */
   2909       break;
   2910    }
   2911 }
   2912 
   2913 
   2914 void lp_emit_immediate_soa(
   2915    struct lp_build_tgsi_context *bld_base,
   2916    const struct tgsi_full_immediate *imm)
   2917 {
   2918    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
   2919    struct gallivm_state * gallivm = bld_base->base.gallivm;
   2920    LLVMValueRef imms[4];
   2921    unsigned i;
   2922    const uint size = imm->Immediate.NrTokens - 1;
   2923    assert(size <= 4);
   2924    switch (imm->Immediate.DataType) {
   2925    case TGSI_IMM_FLOAT32:
   2926       for( i = 0; i < size; ++i )
   2927          imms[i] =
   2928                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
   2929 
   2930       break;
   2931    case TGSI_IMM_FLOAT64:
   2932    case TGSI_IMM_UINT64:
   2933    case TGSI_IMM_INT64:
   2934    case TGSI_IMM_UINT32:
   2935       for( i = 0; i < size; ++i ) {
   2936          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
   2937          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
   2938       }
   2939 
   2940       break;
   2941    case TGSI_IMM_INT32:
   2942       for( i = 0; i < size; ++i ) {
   2943          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
   2944          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
   2945       }
   2946 
   2947       break;
   2948    }
   2949    for( i = size; i < 4; ++i )
   2950       imms[i] = bld_base->base.undef;
   2951 
   2952    if (bld->use_immediates_array) {
   2953       unsigned index = bld->num_immediates;
   2954       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   2955       LLVMBuilderRef builder = gallivm->builder;
   2956 
   2957       assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
   2958       for (i = 0; i < 4; ++i ) {
   2959          LLVMValueRef lindex = lp_build_const_int32(
   2960                   bld->bld_base.base.gallivm, index * 4 + i);
   2961          LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
   2962                                              bld->imms_array, &lindex, 1, "");
   2963          LLVMBuildStore(builder, imms[i], imm_ptr);
   2964       }
   2965    } else {
   2966       /* simply copy the immediate values into the next immediates[] slot */
   2967       unsigned i;
   2968       assert(imm->Immediate.NrTokens - 1 <= 4);
   2969       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
   2970 
   2971       for(i = 0; i < 4; ++i )
   2972          bld->immediates[bld->num_immediates][i] = imms[i];
   2973 
   2974       if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
   2975          unsigned index = bld->num_immediates;
   2976          struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   2977          LLVMBuilderRef builder = gallivm->builder;
   2978          for (i = 0; i < 4; ++i ) {
   2979             LLVMValueRef lindex = lp_build_const_int32(
   2980                      bld->bld_base.base.gallivm, index * 4 + i);
   2981             LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
   2982                                                 bld->imms_array, &lindex, 1, "");
   2983             LLVMBuildStore(builder,
   2984                            bld->immediates[index][i],
   2985                            imm_ptr);
   2986          }
   2987       }
   2988    }
   2989 
   2990    bld->num_immediates++;
   2991 }
   2992 
   2993 static void
   2994 ddx_emit(
   2995    const struct lp_build_tgsi_action * action,
   2996    struct lp_build_tgsi_context * bld_base,
   2997    struct lp_build_emit_data * emit_data)
   2998 {
   2999    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3000 
   3001    emit_fetch_deriv(bld, emit_data->args[0], NULL,
   3002                     &emit_data->output[emit_data->chan], NULL);
   3003 }
   3004 
   3005 static void
   3006 ddy_emit(
   3007    const struct lp_build_tgsi_action * action,
   3008    struct lp_build_tgsi_context * bld_base,
   3009    struct lp_build_emit_data * emit_data)
   3010 {
   3011    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3012 
   3013    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
   3014                     &emit_data->output[emit_data->chan]);
   3015 }
   3016 
   3017 static void
   3018 kill_emit(
   3019    const struct lp_build_tgsi_action * action,
   3020    struct lp_build_tgsi_context * bld_base,
   3021    struct lp_build_emit_data * emit_data)
   3022 {
   3023    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3024 
   3025    emit_kill(bld, bld_base->pc - 1);
   3026 }
   3027 
   3028 static void
   3029 kill_if_emit(
   3030    const struct lp_build_tgsi_action * action,
   3031    struct lp_build_tgsi_context * bld_base,
   3032    struct lp_build_emit_data * emit_data)
   3033 {
   3034    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3035 
   3036    emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
   3037 }
   3038 
   3039 static void
   3040 tex_emit(
   3041    const struct lp_build_tgsi_action * action,
   3042    struct lp_build_tgsi_context * bld_base,
   3043    struct lp_build_emit_data * emit_data)
   3044 {
   3045    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3046 
   3047    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3048             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
   3049 }
   3050 
   3051 static void
   3052 tex2_emit(
   3053    const struct lp_build_tgsi_action * action,
   3054    struct lp_build_tgsi_context * bld_base,
   3055    struct lp_build_emit_data * emit_data)
   3056 {
   3057    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3058 
   3059    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3060             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
   3061 }
   3062 
   3063 static void
   3064 txb_emit(
   3065    const struct lp_build_tgsi_action * action,
   3066    struct lp_build_tgsi_context * bld_base,
   3067    struct lp_build_emit_data * emit_data)
   3068 {
   3069    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3070 
   3071    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
   3072             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
   3073 }
   3074 
   3075 static void
   3076 txb2_emit(
   3077    const struct lp_build_tgsi_action * action,
   3078    struct lp_build_tgsi_context * bld_base,
   3079    struct lp_build_emit_data * emit_data)
   3080 {
   3081    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3082 
   3083    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
   3084             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
   3085 }
   3086 
   3087 static void
   3088 txd_emit(
   3089    const struct lp_build_tgsi_action * action,
   3090    struct lp_build_tgsi_context * bld_base,
   3091    struct lp_build_emit_data * emit_data)
   3092 {
   3093    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3094 
   3095    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
   3096             emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
   3097 }
   3098 
   3099 static void
   3100 txl_emit(
   3101    const struct lp_build_tgsi_action * action,
   3102    struct lp_build_tgsi_context * bld_base,
   3103    struct lp_build_emit_data * emit_data)
   3104 {
   3105    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3106 
   3107    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
   3108             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
   3109 }
   3110 
   3111 static void
   3112 txl2_emit(
   3113    const struct lp_build_tgsi_action * action,
   3114    struct lp_build_tgsi_context * bld_base,
   3115    struct lp_build_emit_data * emit_data)
   3116 {
   3117    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3118 
   3119    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
   3120             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
   3121 }
   3122 
   3123 static void
   3124 txp_emit(
   3125    const struct lp_build_tgsi_action * action,
   3126    struct lp_build_tgsi_context * bld_base,
   3127    struct lp_build_emit_data * emit_data)
   3128 {
   3129    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3130 
   3131    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
   3132             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
   3133 }
   3134 
   3135 static void
   3136 tg4_emit(
   3137    const struct lp_build_tgsi_action * action,
   3138    struct lp_build_tgsi_context * bld_base,
   3139    struct lp_build_emit_data * emit_data)
   3140 {
   3141    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3142 
   3143    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3144             emit_data->output, 2, LP_SAMPLER_OP_GATHER);
   3145 }
   3146 
   3147 static void
   3148 lodq_emit(
   3149    const struct lp_build_tgsi_action * action,
   3150    struct lp_build_tgsi_context * bld_base,
   3151    struct lp_build_emit_data * emit_data)
   3152 {
   3153    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3154 
   3155    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3156             emit_data->output, 1, LP_SAMPLER_OP_LODQ);
   3157 }
   3158 
   3159 static void
   3160 txq_emit(
   3161    const struct lp_build_tgsi_action * action,
   3162    struct lp_build_tgsi_context * bld_base,
   3163    struct lp_build_emit_data * emit_data)
   3164 {
   3165    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3166 
   3167    emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
   3168 }
   3169 
   3170 static void
   3171 txf_emit(
   3172    const struct lp_build_tgsi_action * action,
   3173    struct lp_build_tgsi_context * bld_base,
   3174    struct lp_build_emit_data * emit_data)
   3175 {
   3176    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3177 
   3178    emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
   3179 }
   3180 
   3181 static void
   3182 sample_i_emit(
   3183    const struct lp_build_tgsi_action * action,
   3184    struct lp_build_tgsi_context * bld_base,
   3185    struct lp_build_emit_data * emit_data)
   3186 {
   3187    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3188 
   3189    emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
   3190 }
   3191 
   3192 static void
   3193 sample_emit(
   3194    const struct lp_build_tgsi_action * action,
   3195    struct lp_build_tgsi_context * bld_base,
   3196    struct lp_build_emit_data * emit_data)
   3197 {
   3198    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3199 
   3200    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3201                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
   3202 }
   3203 
   3204 static void
   3205 sample_b_emit(
   3206    const struct lp_build_tgsi_action * action,
   3207    struct lp_build_tgsi_context * bld_base,
   3208    struct lp_build_emit_data * emit_data)
   3209 {
   3210    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3211 
   3212    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
   3213                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
   3214 }
   3215 
   3216 static void
   3217 sample_c_emit(
   3218    const struct lp_build_tgsi_action * action,
   3219    struct lp_build_tgsi_context * bld_base,
   3220    struct lp_build_emit_data * emit_data)
   3221 {
   3222    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3223 
   3224    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3225                TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
   3226 }
   3227 
   3228 static void
   3229 sample_c_lz_emit(
   3230    const struct lp_build_tgsi_action * action,
   3231    struct lp_build_tgsi_context * bld_base,
   3232    struct lp_build_emit_data * emit_data)
   3233 {
   3234    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3235 
   3236    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
   3237                TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
   3238 }
   3239 
   3240 static void
   3241 sample_d_emit(
   3242    const struct lp_build_tgsi_action * action,
   3243    struct lp_build_tgsi_context * bld_base,
   3244    struct lp_build_emit_data * emit_data)
   3245 {
   3246    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3247 
   3248    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
   3249                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
   3250 }
   3251 
   3252 static void
   3253 sample_l_emit(
   3254    const struct lp_build_tgsi_action * action,
   3255    struct lp_build_tgsi_context * bld_base,
   3256    struct lp_build_emit_data * emit_data)
   3257 {
   3258    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3259 
   3260    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
   3261                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
   3262 }
   3263 
   3264 static void
   3265 gather4_emit(
   3266    const struct lp_build_tgsi_action * action,
   3267    struct lp_build_tgsi_context * bld_base,
   3268    struct lp_build_emit_data * emit_data)
   3269 {
   3270    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3271 
   3272    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3273                FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
   3274 }
   3275 
   3276 static void
   3277 sviewinfo_emit(
   3278    const struct lp_build_tgsi_action * action,
   3279    struct lp_build_tgsi_context * bld_base,
   3280    struct lp_build_emit_data * emit_data)
   3281 {
   3282    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3283 
   3284    emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
   3285 }
   3286 
   3287 static void
   3288 lod_emit(
   3289    const struct lp_build_tgsi_action * action,
   3290    struct lp_build_tgsi_context * bld_base,
   3291    struct lp_build_emit_data * emit_data)
   3292 {
   3293    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3294 
   3295    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3296                FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
   3297 }
   3298 
   3299 static LLVMValueRef
   3300 mask_vec(struct lp_build_tgsi_context *bld_base)
   3301 {
   3302    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3303    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   3304    struct lp_exec_mask *exec_mask = &bld->exec_mask;
   3305 
   3306    if (!exec_mask->has_mask) {
   3307       return lp_build_mask_value(bld->mask);
   3308    }
   3309    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
   3310                        exec_mask->exec_mask, "");
   3311 }
   3312 
   3313 static void
   3314 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
   3315                           LLVMValueRef ptr,
   3316                           LLVMValueRef mask)
   3317 {
   3318    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
   3319    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
   3320 
   3321    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
   3322 
   3323    LLVMBuildStore(builder, current_vec, ptr);
   3324 }
   3325 
   3326 static void
   3327 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
   3328                              LLVMValueRef ptr,
   3329                              LLVMValueRef mask)
   3330 {
   3331    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
   3332    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
   3333 
   3334    current_vec = lp_build_select(&bld_base->uint_bld,
   3335                                  mask,
   3336                                  bld_base->uint_bld.zero,
   3337                                  current_vec);
   3338 
   3339    LLVMBuildStore(builder, current_vec, ptr);
   3340 }
   3341 
   3342 static LLVMValueRef
   3343 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
   3344                                   LLVMValueRef current_mask_vec,
   3345                                   LLVMValueRef total_emitted_vertices_vec)
   3346 {
   3347    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   3348    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
   3349    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
   3350                                         total_emitted_vertices_vec,
   3351                                         bld->max_output_vertices_vec);
   3352 
   3353    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
   3354 }
   3355 
   3356 static void
   3357 emit_vertex(
   3358    const struct lp_build_tgsi_action * action,
   3359    struct lp_build_tgsi_context * bld_base,
   3360    struct lp_build_emit_data * emit_data)
   3361 {
   3362    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3363    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   3364 
   3365    if (bld->gs_iface->emit_vertex) {
   3366       LLVMValueRef mask = mask_vec(bld_base);
   3367       LLVMValueRef total_emitted_vertices_vec =
   3368          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
   3369       mask = clamp_mask_to_max_output_vertices(bld, mask,
   3370                                                total_emitted_vertices_vec);
   3371       gather_outputs(bld);
   3372       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
   3373                                  bld->outputs,
   3374                                  total_emitted_vertices_vec);
   3375       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
   3376                                 mask);
   3377       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
   3378                                 mask);
   3379 #if DUMP_GS_EMITS
   3380       lp_build_print_value(bld->bld_base.base.gallivm,
   3381                            " +++ emit vertex masked ones = ",
   3382                            mask);
   3383       lp_build_print_value(bld->bld_base.base.gallivm,
   3384                            " +++ emit vertex emitted = ",
   3385                            total_emitted_vertices_vec);
   3386 #endif
   3387    }
   3388 }
   3389 
   3390 
   3391 static void
   3392 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
   3393                      LLVMValueRef mask)
   3394 {
   3395    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3396    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   3397 
   3398    if (bld->gs_iface->end_primitive) {
   3399       struct lp_build_context *uint_bld = &bld_base->uint_bld;
   3400       LLVMValueRef emitted_vertices_vec =
   3401          LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
   3402       LLVMValueRef emitted_prims_vec =
   3403          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
   3404 
   3405       LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
   3406                                                emitted_vertices_vec,
   3407                                                uint_bld->zero);
   3408       /* We need to combine the current execution mask with the mask
   3409          telling us which, if any, execution slots actually have
   3410          unemitted primitives, this way we make sure that end_primitives
   3411          executes only on the paths that have unflushed vertices */
   3412       mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
   3413 
   3414       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
   3415                                    emitted_vertices_vec,
   3416                                    emitted_prims_vec);
   3417 
   3418 #if DUMP_GS_EMITS
   3419       lp_build_print_value(bld->bld_base.base.gallivm,
   3420                            " +++ end prim masked ones = ",
   3421                            mask);
   3422       lp_build_print_value(bld->bld_base.base.gallivm,
   3423                            " +++ end prim emitted verts1 = ",
   3424                            emitted_vertices_vec);
   3425       lp_build_print_value(bld->bld_base.base.gallivm,
   3426                            " +++ end prim emitted prims1 = ",
   3427                            LLVMBuildLoad(builder,
   3428                                          bld->emitted_prims_vec_ptr, ""));
   3429 #endif
   3430       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
   3431                                 mask);
   3432       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
   3433                                    mask);
   3434 #if DUMP_GS_EMITS
   3435       lp_build_print_value(bld->bld_base.base.gallivm,
   3436                            " +++ end prim emitted verts2 = ",
   3437                            LLVMBuildLoad(builder,
   3438                                          bld->emitted_vertices_vec_ptr, ""));
   3439 #endif
   3440    }
   3441 
   3442 }
   3443 
   3444 static void
   3445 end_primitive(
   3446    const struct lp_build_tgsi_action * action,
   3447    struct lp_build_tgsi_context * bld_base,
   3448    struct lp_build_emit_data * emit_data)
   3449 {
   3450    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3451 
   3452    if (bld->gs_iface->end_primitive) {
   3453       LLVMValueRef mask = mask_vec(bld_base);
   3454       end_primitive_masked(bld_base, mask);
   3455    }
   3456 }
   3457 
   3458 static void
   3459 cal_emit(
   3460    const struct lp_build_tgsi_action * action,
   3461    struct lp_build_tgsi_context * bld_base,
   3462    struct lp_build_emit_data * emit_data)
   3463 {
   3464    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3465 
   3466    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
   3467                      &bld_base->pc);
   3468 }
   3469 
   3470 static void
   3471 ret_emit(
   3472    const struct lp_build_tgsi_action * action,
   3473    struct lp_build_tgsi_context * bld_base,
   3474    struct lp_build_emit_data * emit_data)
   3475 {
   3476    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3477 
   3478    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
   3479 }
   3480 
   3481 static void
   3482 brk_emit(
   3483    const struct lp_build_tgsi_action * action,
   3484    struct lp_build_tgsi_context * bld_base,
   3485    struct lp_build_emit_data * emit_data)
   3486 {
   3487    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3488 
   3489    lp_exec_break(&bld->exec_mask, bld_base);
   3490 }
   3491 
   3492 static void
   3493 if_emit(
   3494    const struct lp_build_tgsi_action * action,
   3495    struct lp_build_tgsi_context * bld_base,
   3496    struct lp_build_emit_data * emit_data)
   3497 {
   3498    LLVMValueRef tmp;
   3499    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3500 
   3501    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
   3502                       emit_data->args[0], bld->bld_base.base.zero);
   3503    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
   3504 }
   3505 
   3506 static void
   3507 uif_emit(
   3508    const struct lp_build_tgsi_action * action,
   3509    struct lp_build_tgsi_context * bld_base,
   3510    struct lp_build_emit_data * emit_data)
   3511 {
   3512    LLVMValueRef tmp;
   3513    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3514    struct lp_build_context *uint_bld = &bld_base->uint_bld;
   3515 
   3516    tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
   3517                       emit_data->args[0], uint_bld->zero);
   3518    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
   3519 }
   3520 
   3521 static void
   3522 case_emit(
   3523    const struct lp_build_tgsi_action * action,
   3524    struct lp_build_tgsi_context * bld_base,
   3525    struct lp_build_emit_data * emit_data)
   3526 {
   3527    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3528 
   3529    lp_exec_case(&bld->exec_mask, emit_data->args[0]);
   3530 }
   3531 
   3532 static void
   3533 default_emit(
   3534    const struct lp_build_tgsi_action * action,
   3535    struct lp_build_tgsi_context * bld_base,
   3536    struct lp_build_emit_data * emit_data)
   3537 {
   3538    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3539 
   3540    lp_exec_default(&bld->exec_mask, bld_base);
   3541 }
   3542 
   3543 static void
   3544 switch_emit(
   3545    const struct lp_build_tgsi_action * action,
   3546    struct lp_build_tgsi_context * bld_base,
   3547    struct lp_build_emit_data * emit_data)
   3548 {
   3549    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3550 
   3551    lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
   3552 }
   3553 
   3554 static void
   3555 endswitch_emit(
   3556    const struct lp_build_tgsi_action * action,
   3557    struct lp_build_tgsi_context * bld_base,
   3558    struct lp_build_emit_data * emit_data)
   3559 {
   3560    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3561 
   3562    lp_exec_endswitch(&bld->exec_mask, bld_base);
   3563 }
   3564 
   3565 static void
   3566 bgnloop_emit(
   3567    const struct lp_build_tgsi_action * action,
   3568    struct lp_build_tgsi_context * bld_base,
   3569    struct lp_build_emit_data * emit_data)
   3570 {
   3571    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3572 
   3573    lp_exec_bgnloop(&bld->exec_mask);
   3574 }
   3575 
   3576 static void
   3577 bgnsub_emit(
   3578    const struct lp_build_tgsi_action * action,
   3579    struct lp_build_tgsi_context * bld_base,
   3580    struct lp_build_emit_data * emit_data)
   3581 {
   3582    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3583 
   3584    lp_exec_mask_bgnsub(&bld->exec_mask);
   3585 }
   3586 
   3587 static void
   3588 else_emit(
   3589    const struct lp_build_tgsi_action * action,
   3590    struct lp_build_tgsi_context * bld_base,
   3591    struct lp_build_emit_data * emit_data)
   3592 {
   3593    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3594 
   3595    lp_exec_mask_cond_invert(&bld->exec_mask);
   3596 }
   3597 
   3598 static void
   3599 endif_emit(
   3600    const struct lp_build_tgsi_action * action,
   3601    struct lp_build_tgsi_context * bld_base,
   3602    struct lp_build_emit_data * emit_data)
   3603 {
   3604    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3605 
   3606    lp_exec_mask_cond_pop(&bld->exec_mask);
   3607 }
   3608 
   3609 static void
   3610 endloop_emit(
   3611    const struct lp_build_tgsi_action * action,
   3612    struct lp_build_tgsi_context * bld_base,
   3613    struct lp_build_emit_data * emit_data)
   3614 {
   3615    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3616 
   3617    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
   3618 }
   3619 
   3620 static void
   3621 endsub_emit(
   3622    const struct lp_build_tgsi_action * action,
   3623    struct lp_build_tgsi_context * bld_base,
   3624    struct lp_build_emit_data * emit_data)
   3625 {
   3626    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3627 
   3628    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
   3629 }
   3630 
   3631 static void
   3632 cont_emit(
   3633    const struct lp_build_tgsi_action * action,
   3634    struct lp_build_tgsi_context * bld_base,
   3635    struct lp_build_emit_data * emit_data)
   3636 {
   3637    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3638 
   3639    lp_exec_continue(&bld->exec_mask);
   3640 }
   3641 
   3642 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
   3643 {
   3644    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3645    struct gallivm_state * gallivm = bld_base->base.gallivm;
   3646 
   3647    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
   3648       LLVMValueRef array_size =
   3649          lp_build_const_int32(gallivm,
   3650                          bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
   3651       bld->temps_array = lp_build_array_alloca(gallivm,
   3652                                               bld_base->base.vec_type, array_size,
   3653                                               "temp_array");
   3654    }
   3655 
   3656    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
   3657       LLVMValueRef array_size =
   3658          lp_build_const_int32(gallivm,
   3659                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
   3660       bld->outputs_array = lp_build_array_alloca(gallivm,
   3661                                                 bld_base->base.vec_type, array_size,
   3662                                                 "output_array");
   3663    }
   3664 
   3665    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
   3666       LLVMValueRef array_size =
   3667          lp_build_const_int32(gallivm,
   3668                          bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
   3669       bld->imms_array = lp_build_array_alloca(gallivm,
   3670                                               bld_base->base.vec_type, array_size,
   3671                                               "imms_array");
   3672    }
   3673 
   3674    /* If we have indirect addressing in inputs we need to copy them into
   3675     * our alloca array to be able to iterate over them */
   3676    if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
   3677       unsigned index, chan;
   3678       LLVMTypeRef vec_type = bld_base->base.vec_type;
   3679       LLVMValueRef array_size = lp_build_const_int32(gallivm,
   3680             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
   3681       bld->inputs_array = lp_build_array_alloca(gallivm,
   3682                                                vec_type, array_size,
   3683                                                "input_array");
   3684 
   3685       assert(bld_base->info->num_inputs
   3686                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
   3687 
   3688       for (index = 0; index < bld_base->info->num_inputs; ++index) {
   3689          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
   3690             LLVMValueRef lindex =
   3691                lp_build_const_int32(gallivm, index * 4 + chan);
   3692             LLVMValueRef input_ptr =
   3693                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
   3694                             &lindex, 1, "");
   3695             LLVMValueRef value = bld->inputs[index][chan];
   3696             if (value)
   3697                LLVMBuildStore(gallivm->builder, value, input_ptr);
   3698          }
   3699       }
   3700    }
   3701 
   3702    if (bld->gs_iface) {
   3703       struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
   3704       bld->emitted_prims_vec_ptr =
   3705          lp_build_alloca(gallivm,
   3706                          uint_bld->vec_type,
   3707                          "emitted_prims_ptr");
   3708       bld->emitted_vertices_vec_ptr =
   3709          lp_build_alloca(gallivm,
   3710                          uint_bld->vec_type,
   3711                          "emitted_vertices_ptr");
   3712       bld->total_emitted_vertices_vec_ptr =
   3713          lp_build_alloca(gallivm,
   3714                          uint_bld->vec_type,
   3715                          "total_emitted_vertices_ptr");
   3716 
   3717       LLVMBuildStore(gallivm->builder, uint_bld->zero,
   3718                      bld->emitted_prims_vec_ptr);
   3719       LLVMBuildStore(gallivm->builder, uint_bld->zero,
   3720                      bld->emitted_vertices_vec_ptr);
   3721       LLVMBuildStore(gallivm->builder, uint_bld->zero,
   3722                      bld->total_emitted_vertices_vec_ptr);
   3723    }
   3724 
   3725    if (DEBUG_EXECUTION) {
   3726       lp_build_printf(gallivm, "\n");
   3727       emit_dump_file(bld, TGSI_FILE_CONSTANT);
   3728       if (!bld->gs_iface)
   3729          emit_dump_file(bld, TGSI_FILE_INPUT);
   3730    }
   3731 }
   3732 
   3733 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
   3734 {
   3735    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3736    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
   3737 
   3738    if (DEBUG_EXECUTION) {
   3739       /* for debugging */
   3740       if (0) {
   3741          emit_dump_file(bld, TGSI_FILE_TEMPORARY);
   3742       }
   3743       emit_dump_file(bld, TGSI_FILE_OUTPUT);
   3744       lp_build_printf(bld_base->base.gallivm, "\n");
   3745    }
   3746 
   3747    /* If we have indirect addressing in outputs we need to copy our alloca array
   3748     * to the outputs slots specified by the caller */
   3749    if (bld->gs_iface) {
   3750       LLVMValueRef total_emitted_vertices_vec;
   3751       LLVMValueRef emitted_prims_vec;
   3752       /* implicit end_primitives, needed in case there are any unflushed
   3753          vertices in the cache. Note must not call end_primitive here
   3754          since the exec_mask is not valid at this point. */
   3755       end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
   3756 
   3757       total_emitted_vertices_vec =
   3758          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
   3759       emitted_prims_vec =
   3760          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
   3761 
   3762       bld->gs_iface->gs_epilogue(bld->gs_iface,
   3763                                  &bld->bld_base,
   3764                                  total_emitted_vertices_vec,
   3765                                  emitted_prims_vec);
   3766    } else {
   3767       gather_outputs(bld);
   3768    }
   3769 }
   3770 
   3771 void
   3772 lp_build_tgsi_soa(struct gallivm_state *gallivm,
   3773                   const struct tgsi_token *tokens,
   3774                   struct lp_type type,
   3775                   struct lp_build_mask_context *mask,
   3776                   LLVMValueRef consts_ptr,
   3777                   LLVMValueRef const_sizes_ptr,
   3778                   const struct lp_bld_tgsi_system_values *system_values,
   3779                   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
   3780                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
   3781                   LLVMValueRef context_ptr,
   3782                   LLVMValueRef thread_data_ptr,
   3783                   struct lp_build_sampler_soa *sampler,
   3784                   const struct tgsi_shader_info *info,
   3785                   const struct lp_build_tgsi_gs_iface *gs_iface)
   3786 {
   3787    struct lp_build_tgsi_soa_context bld;
   3788 
   3789    struct lp_type res_type;
   3790 
   3791    assert(type.length <= LP_MAX_VECTOR_LENGTH);
   3792    memset(&res_type, 0, sizeof res_type);
   3793    res_type.width = type.width;
   3794    res_type.length = type.length;
   3795    res_type.sign = 1;
   3796 
   3797    /* Setup build context */
   3798    memset(&bld, 0, sizeof bld);
   3799    lp_build_context_init(&bld.bld_base.base, gallivm, type);
   3800    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
   3801    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
   3802    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
   3803    {
   3804       struct lp_type dbl_type;
   3805       dbl_type = type;
   3806       dbl_type.width *= 2;
   3807       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
   3808    }
   3809    {
   3810       struct lp_type uint64_type;
   3811       uint64_type = lp_uint_type(type);
   3812       uint64_type.width *= 2;
   3813       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
   3814    }
   3815    {
   3816       struct lp_type int64_type;
   3817       int64_type = lp_int_type(type);
   3818       int64_type.width *= 2;
   3819       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
   3820    }
   3821    bld.mask = mask;
   3822    bld.inputs = inputs;
   3823    bld.outputs = outputs;
   3824    bld.consts_ptr = consts_ptr;
   3825    bld.const_sizes_ptr = const_sizes_ptr;
   3826    bld.sampler = sampler;
   3827    bld.bld_base.info = info;
   3828    bld.indirect_files = info->indirect_files;
   3829    bld.context_ptr = context_ptr;
   3830    bld.thread_data_ptr = thread_data_ptr;
   3831 
   3832    /*
   3833     * If the number of temporaries is rather large then we just
   3834     * allocate them as an array right from the start and treat
   3835     * like indirect temporaries.
   3836     */
   3837    if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
   3838       bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
   3839    }
   3840    /*
   3841     * For performance reason immediates are always backed in a static
   3842     * array, but if their number is too great, we have to use just
   3843     * a dynamically allocated array.
   3844     */
   3845    bld.use_immediates_array =
   3846          (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
   3847    if (bld.use_immediates_array) {
   3848       bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
   3849    }
   3850 
   3851 
   3852    bld.bld_base.soa = TRUE;
   3853    bld.bld_base.emit_debug = emit_debug;
   3854    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
   3855    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
   3856    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
   3857    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
   3858    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
   3859    bld.bld_base.emit_store = emit_store;
   3860 
   3861    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
   3862    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
   3863 
   3864    bld.bld_base.emit_prologue = emit_prologue;
   3865    bld.bld_base.emit_epilogue = emit_epilogue;
   3866 
   3867    /* Set opcode actions */
   3868    lp_set_default_actions_cpu(&bld.bld_base);
   3869 
   3870    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
   3871    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
   3872    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
   3873    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
   3874    bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
   3875    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
   3876    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
   3877    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
   3878    bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
   3879    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
   3880    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
   3881    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
   3882    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
   3883    bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
   3884    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
   3885    bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
   3886    bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
   3887    bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
   3888    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
   3889    bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
   3890    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
   3891    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
   3892    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
   3893    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
   3894    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
   3895    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
   3896    bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
   3897    bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
   3898    bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
   3899    bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
   3900    bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
   3901    bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
   3902    /* DX10 sampling ops */
   3903    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
   3904    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
   3905    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
   3906    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
   3907    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
   3908    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
   3909    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
   3910    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
   3911    bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
   3912    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
   3913    bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
   3914 
   3915 
   3916    if (gs_iface) {
   3917       /* There's no specific value for this because it should always
   3918        * be set, but apps using ext_geometry_shader4 quite often
   3919        * were forgetting so we're using MAX_VERTEX_VARYING from
   3920        * that spec even though we could debug_assert if it's not
   3921        * set, but that's a lot uglier. */
   3922       uint max_output_vertices;
   3923 
   3924       /* inputs are always indirect with gs */
   3925       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
   3926       bld.gs_iface = gs_iface;
   3927       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
   3928       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
   3929       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
   3930 
   3931       max_output_vertices =
   3932             info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
   3933       if (!max_output_vertices)
   3934          max_output_vertices = 32;
   3935 
   3936       bld.max_output_vertices_vec =
   3937          lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
   3938                                 max_output_vertices);
   3939    }
   3940 
   3941    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
   3942 
   3943    bld.system_values = *system_values;
   3944 
   3945    lp_build_tgsi_llvm(&bld.bld_base, tokens);
   3946 
   3947    if (0) {
   3948       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
   3949       LLVMValueRef function = LLVMGetBasicBlockParent(block);
   3950       debug_printf("11111111111111111111111111111 \n");
   3951       tgsi_dump(tokens, 0);
   3952       lp_debug_dump_value(function);
   3953       debug_printf("2222222222222222222222222222 \n");
   3954    }
   3955 
   3956    if (0) {
   3957       LLVMModuleRef module = LLVMGetGlobalParent(
   3958          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
   3959       LLVMDumpModule(module);
   3960 
   3961    }
   3962    lp_exec_mask_fini(&bld.exec_mask);
   3963 }
   3964