Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * Copyright 2007-2008 VMware, Inc.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial portions
     17  * of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  **************************************************************************/
     28 
     29 /**
     30  * @file
     31  * TGSI to LLVM IR translation -- SoA.
     32  *
     33  * @author Jose Fonseca <jfonseca (at) vmware.com>
     34  *
     35  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
     36  * Brian Paul, and others.
     37  */
     38 
     39 #include "pipe/p_config.h"
     40 #include "pipe/p_shader_tokens.h"
     41 #include "util/u_debug.h"
     42 #include "util/u_math.h"
     43 #include "util/u_memory.h"
     44 #include "tgsi/tgsi_dump.h"
     45 #include "tgsi/tgsi_exec.h"
     46 #include "tgsi/tgsi_info.h"
     47 #include "tgsi/tgsi_parse.h"
     48 #include "tgsi/tgsi_util.h"
     49 #include "tgsi/tgsi_scan.h"
     50 #include "tgsi/tgsi_strings.h"
     51 #include "lp_bld_tgsi_action.h"
     52 #include "lp_bld_type.h"
     53 #include "lp_bld_const.h"
     54 #include "lp_bld_arit.h"
     55 #include "lp_bld_bitarit.h"
     56 #include "lp_bld_gather.h"
     57 #include "lp_bld_init.h"
     58 #include "lp_bld_logic.h"
     59 #include "lp_bld_swizzle.h"
     60 #include "lp_bld_flow.h"
     61 #include "lp_bld_quad.h"
     62 #include "lp_bld_tgsi.h"
     63 #include "lp_bld_limits.h"
     64 #include "lp_bld_debug.h"
     65 #include "lp_bld_printf.h"
     66 #include "lp_bld_sample.h"
     67 #include "lp_bld_struct.h"
     68 
     69 /* SM 4.0 says that subroutines can nest 32 deep and
     70  * we need one more for our main function */
     71 #define LP_MAX_NUM_FUNCS 33
     72 
     73 #define DUMP_GS_EMITS 0
     74 
     75 /*
     76  * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
     77  * instruction.
     78  *
     79  * TODO:
     80  * - take execution masks in consideration
     81  * - debug control-flow instructions
     82  */
     83 #define DEBUG_EXECUTION 0
     84 
     85 
     86 /*
     87  * Emit code to print a register value.
     88  */
     89 static void
     90 emit_dump_reg(struct gallivm_state *gallivm,
     91               unsigned file,
     92               unsigned index,
     93               unsigned chan,
     94               LLVMValueRef value)
     95 {
     96    char buf[32];
     97 
     98    util_snprintf(buf, sizeof buf, "    %s[%u].%c = ",
     99                  tgsi_file_name(file),
    100                  index, "xyzw"[chan]);
    101 
    102    lp_build_print_value(gallivm, buf, value);
    103 }
    104 
    105 /*
    106  * Return the context for the current function.
    107  * (always 'main', if shader doesn't do any function calls)
    108  */
    109 static inline struct function_ctx *
    110 func_ctx(struct lp_exec_mask *mask)
    111 {
    112    assert(mask->function_stack_size > 0);
    113    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
    114    return &mask->function_stack[mask->function_stack_size - 1];
    115 }
    116 
    117 /*
    118  * Returns true if we're in a loop.
    119  * It's global, meaning that it returns true even if there's
    120  * no loop inside the current function, but we were inside
    121  * a loop inside another function, from which this one was called.
    122  */
    123 static inline boolean
    124 mask_has_loop(struct lp_exec_mask *mask)
    125 {
    126    int i;
    127    for (i = mask->function_stack_size - 1; i >= 0; --i) {
    128       const struct function_ctx *ctx = &mask->function_stack[i];
    129       if (ctx->loop_stack_size > 0)
    130          return TRUE;
    131    }
    132    return FALSE;
    133 }
    134 
    135 /*
    136  * Returns true if we're inside a switch statement.
    137  * It's global, meaning that it returns true even if there's
    138  * no switch in the current function, but we were inside
    139  * a switch inside another function, from which this one was called.
    140  */
    141 static inline boolean
    142 mask_has_switch(struct lp_exec_mask *mask)
    143 {
    144    int i;
    145    for (i = mask->function_stack_size - 1; i >= 0; --i) {
    146       const struct function_ctx *ctx = &mask->function_stack[i];
    147       if (ctx->switch_stack_size > 0)
    148          return TRUE;
    149    }
    150    return FALSE;
    151 }
    152 
    153 /*
    154  * Returns true if we're inside a conditional.
    155  * It's global, meaning that it returns true even if there's
    156  * no conditional in the current function, but we were inside
    157  * a conditional inside another function, from which this one was called.
    158  */
    159 static inline boolean
    160 mask_has_cond(struct lp_exec_mask *mask)
    161 {
    162    int i;
    163    for (i = mask->function_stack_size - 1; i >= 0; --i) {
    164       const struct function_ctx *ctx = &mask->function_stack[i];
    165       if (ctx->cond_stack_size > 0)
    166          return TRUE;
    167    }
    168    return FALSE;
    169 }
    170 
    171 
    172 /*
    173  * Initialize a function context at the specified index.
    174  */
    175 static void
    176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
    177 {
    178    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
    179    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    180    struct function_ctx *ctx =  &mask->function_stack[function_idx];
    181 
    182    ctx->cond_stack_size = 0;
    183    ctx->loop_stack_size = 0;
    184    ctx->switch_stack_size = 0;
    185 
    186    if (function_idx == 0) {
    187       ctx->ret_mask = mask->ret_mask;
    188    }
    189 
    190    ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
    191                                        int_type, "looplimiter");
    192    LLVMBuildStore(
    193       builder,
    194       LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
    195       ctx->loop_limiter);
    196 }
    197 
    198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
    199 {
    200    mask->bld = bld;
    201    mask->has_mask = FALSE;
    202    mask->ret_in_main = FALSE;
    203    /* For the main function */
    204    mask->function_stack_size = 1;
    205 
    206    mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
    207    mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
    208          mask->cond_mask = mask->switch_mask =
    209          LLVMConstAllOnes(mask->int_vec_type);
    210 
    211    mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
    212                                  sizeof(mask->function_stack[0]));
    213    lp_exec_mask_function_init(mask, 0);
    214 }
    215 
    216 static void
    217 lp_exec_mask_fini(struct lp_exec_mask *mask)
    218 {
    219    FREE(mask->function_stack);
    220 }
    221 
    222 static void lp_exec_mask_update(struct lp_exec_mask *mask)
    223 {
    224    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    225    boolean has_loop_mask = mask_has_loop(mask);
    226    boolean has_cond_mask = mask_has_cond(mask);
    227    boolean has_switch_mask = mask_has_switch(mask);
    228    boolean has_ret_mask = mask->function_stack_size > 1 ||
    229          mask->ret_in_main;
    230 
    231    if (has_loop_mask) {
    232       /*for loops we need to update the entire mask at runtime */
    233       LLVMValueRef tmp;
    234       assert(mask->break_mask);
    235       tmp = LLVMBuildAnd(builder,
    236                          mask->cont_mask,
    237                          mask->break_mask,
    238                          "maskcb");
    239       mask->exec_mask = LLVMBuildAnd(builder,
    240                                      mask->cond_mask,
    241                                      tmp,
    242                                      "maskfull");
    243    } else
    244       mask->exec_mask = mask->cond_mask;
    245 
    246    if (has_switch_mask) {
    247       mask->exec_mask = LLVMBuildAnd(builder,
    248                                      mask->exec_mask,
    249                                      mask->switch_mask,
    250                                      "switchmask");
    251    }
    252 
    253    if (has_ret_mask) {
    254       mask->exec_mask = LLVMBuildAnd(builder,
    255                                      mask->exec_mask,
    256                                      mask->ret_mask,
    257                                      "callmask");
    258    }
    259 
    260    mask->has_mask = (has_cond_mask ||
    261                      has_loop_mask ||
    262                      has_switch_mask ||
    263                      has_ret_mask);
    264 }
    265 
    266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
    267                                    LLVMValueRef val)
    268 {
    269    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    270    struct function_ctx *ctx = func_ctx(mask);
    271 
    272    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
    273       ctx->cond_stack_size++;
    274       return;
    275    }
    276    if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
    277       assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
    278    }
    279    ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
    280    assert(LLVMTypeOf(val) == mask->int_vec_type);
    281    mask->cond_mask = LLVMBuildAnd(builder,
    282                                   mask->cond_mask,
    283                                   val,
    284                                   "");
    285    lp_exec_mask_update(mask);
    286 }
    287 
    288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
    289 {
    290    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    291    struct function_ctx *ctx = func_ctx(mask);
    292    LLVMValueRef prev_mask;
    293    LLVMValueRef inv_mask;
    294 
    295    assert(ctx->cond_stack_size);
    296    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
    297       return;
    298    prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
    299    if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
    300       assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
    301    }
    302 
    303    inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
    304 
    305    mask->cond_mask = LLVMBuildAnd(builder,
    306                                   inv_mask,
    307                                   prev_mask, "");
    308    lp_exec_mask_update(mask);
    309 }
    310 
    311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
    312 {
    313    struct function_ctx *ctx = func_ctx(mask);
    314    assert(ctx->cond_stack_size);
    315    --ctx->cond_stack_size;
    316    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
    317       return;
    318    mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
    319    lp_exec_mask_update(mask);
    320 }
    321 
    322 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
    323 {
    324    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    325    struct function_ctx *ctx = func_ctx(mask);
    326 
    327    if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
    328       ++ctx->loop_stack_size;
    329       return;
    330    }
    331 
    332    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
    333       ctx->break_type;
    334    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
    335 
    336    ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
    337    ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
    338    ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
    339    ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
    340    ++ctx->loop_stack_size;
    341 
    342    ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
    343    LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
    344 
    345    ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
    346 
    347    LLVMBuildBr(builder, ctx->loop_block);
    348    LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
    349 
    350    mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
    351 
    352    lp_exec_mask_update(mask);
    353 }
    354 
    355 static void lp_exec_break(struct lp_exec_mask *mask,
    356                           struct lp_build_tgsi_context * bld_base)
    357 {
    358    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    359    struct function_ctx *ctx = func_ctx(mask);
    360 
    361    if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
    362       LLVMValueRef exec_mask = LLVMBuildNot(builder,
    363                                             mask->exec_mask,
    364                                             "break");
    365 
    366       mask->break_mask = LLVMBuildAnd(builder,
    367                                       mask->break_mask,
    368                                       exec_mask, "break_full");
    369    }
    370    else {
    371       unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
    372       boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
    373                               opcode == TGSI_OPCODE_CASE);
    374 
    375 
    376       if (ctx->switch_in_default) {
    377          /*
    378           * stop default execution but only if this is an unconditional switch.
    379           * (The condition here is not perfect since dead code after break is
    380           * allowed but should be sufficient since false negatives are just
    381           * unoptimized - so we don't have to pre-evaluate that).
    382           */
    383          if(break_always && ctx->switch_pc) {
    384             bld_base->pc = ctx->switch_pc;
    385             return;
    386          }
    387       }
    388 
    389       if (break_always) {
    390          mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
    391       }
    392       else {
    393          LLVMValueRef exec_mask = LLVMBuildNot(builder,
    394                                                mask->exec_mask,
    395                                                "break");
    396          mask->switch_mask = LLVMBuildAnd(builder,
    397                                           mask->switch_mask,
    398                                           exec_mask, "break_switch");
    399       }
    400    }
    401 
    402    lp_exec_mask_update(mask);
    403 }
    404 
    405 static void lp_exec_break_condition(struct lp_exec_mask *mask,
    406                                     LLVMValueRef cond)
    407 {
    408    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    409    struct function_ctx *ctx = func_ctx(mask);
    410    LLVMValueRef cond_mask = LLVMBuildAnd(builder,
    411                                          mask->exec_mask,
    412                                          cond, "cond_mask");
    413    cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
    414 
    415    if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
    416       mask->break_mask = LLVMBuildAnd(builder,
    417                                       mask->break_mask,
    418                                       cond_mask, "breakc_full");
    419    }
    420    else {
    421       mask->switch_mask = LLVMBuildAnd(builder,
    422                                        mask->switch_mask,
    423                                        cond_mask, "breakc_switch");
    424    }
    425 
    426    lp_exec_mask_update(mask);
    427 }
    428 
    429 static void lp_exec_continue(struct lp_exec_mask *mask)
    430 {
    431    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    432    LLVMValueRef exec_mask = LLVMBuildNot(builder,
    433                                          mask->exec_mask,
    434                                          "");
    435 
    436    mask->cont_mask = LLVMBuildAnd(builder,
    437                                   mask->cont_mask,
    438                                   exec_mask, "");
    439 
    440    lp_exec_mask_update(mask);
    441 }
    442 
    443 
    444 static void lp_exec_endloop(struct gallivm_state *gallivm,
    445                             struct lp_exec_mask *mask)
    446 {
    447    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    448    struct function_ctx *ctx = func_ctx(mask);
    449    LLVMBasicBlockRef endloop;
    450    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
    451    LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
    452                                                mask->bld->type.width *
    453                                                mask->bld->type.length);
    454    LLVMValueRef i1cond, i2cond, icond, limiter;
    455 
    456    assert(mask->break_mask);
    457 
    458 
    459    assert(ctx->loop_stack_size);
    460    if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
    461       --ctx->loop_stack_size;
    462       return;
    463    }
    464 
    465    /*
    466     * Restore the cont_mask, but don't pop
    467     */
    468    mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
    469    lp_exec_mask_update(mask);
    470 
    471    /*
    472     * Unlike the continue mask, the break_mask must be preserved across loop
    473     * iterations
    474     */
    475    LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
    476 
    477    /* Decrement the loop limiter */
    478    limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
    479 
    480    limiter = LLVMBuildSub(
    481       builder,
    482       limiter,
    483       LLVMConstInt(int_type, 1, false),
    484       "");
    485 
    486    LLVMBuildStore(builder, limiter, ctx->loop_limiter);
    487 
    488    /* i1cond = (mask != 0) */
    489    i1cond = LLVMBuildICmp(
    490       builder,
    491       LLVMIntNE,
    492       LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
    493       LLVMConstNull(reg_type), "i1cond");
    494 
    495    /* i2cond = (looplimiter > 0) */
    496    i2cond = LLVMBuildICmp(
    497       builder,
    498       LLVMIntSGT,
    499       limiter,
    500       LLVMConstNull(int_type), "i2cond");
    501 
    502    /* if( i1cond && i2cond ) */
    503    icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
    504 
    505    endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
    506 
    507    LLVMBuildCondBr(builder,
    508                    icond, ctx->loop_block, endloop);
    509 
    510    LLVMPositionBuilderAtEnd(builder, endloop);
    511 
    512    assert(ctx->loop_stack_size);
    513    --ctx->loop_stack_size;
    514    mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
    515    mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
    516    ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
    517    ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
    518    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
    519          ctx->switch_stack_size];
    520 
    521    lp_exec_mask_update(mask);
    522 }
    523 
    524 static void lp_exec_switch(struct lp_exec_mask *mask,
    525                            LLVMValueRef switchval)
    526 {
    527    struct function_ctx *ctx = func_ctx(mask);
    528 
    529    if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
    530        ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
    531       ctx->switch_stack_size++;
    532       return;
    533    }
    534 
    535    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
    536       ctx->break_type;
    537    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
    538 
    539    ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
    540    ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
    541    ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
    542    ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
    543    ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
    544    ctx->switch_stack_size++;
    545 
    546    mask->switch_mask = LLVMConstNull(mask->int_vec_type);
    547    ctx->switch_val = switchval;
    548    ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
    549    ctx->switch_in_default = false;
    550    ctx->switch_pc = 0;
    551 
    552    lp_exec_mask_update(mask);
    553 }
    554 
    555 static void lp_exec_endswitch(struct lp_exec_mask *mask,
    556                               struct lp_build_tgsi_context * bld_base)
    557 {
    558    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    559    struct function_ctx *ctx = func_ctx(mask);
    560 
    561    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
    562       ctx->switch_stack_size--;
    563       return;
    564    }
    565 
    566    /* check if there's deferred default if so do it now */
    567    if (ctx->switch_pc && !ctx->switch_in_default) {
    568       LLVMValueRef prevmask, defaultmask;
    569       unsigned tmp_pc;
    570       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
    571       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
    572       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
    573       ctx->switch_in_default = true;
    574 
    575       lp_exec_mask_update(mask);
    576 
    577       assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
    578              TGSI_OPCODE_DEFAULT);
    579 
    580       tmp_pc = bld_base->pc;
    581       bld_base->pc = ctx->switch_pc;
    582       /*
    583        * re-purpose switch_pc to point to here again, since we stop execution of
    584        * the deferred default after next break.
    585        */
    586       ctx->switch_pc = tmp_pc - 1;
    587 
    588       return;
    589    }
    590 
    591    else if (ctx->switch_pc && ctx->switch_in_default) {
    592       assert(bld_base->pc == ctx->switch_pc + 1);
    593    }
    594 
    595    ctx->switch_stack_size--;
    596    mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
    597    ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
    598    ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
    599    ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
    600    ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
    601 
    602    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
    603 
    604    lp_exec_mask_update(mask);
    605 }
    606 
    607 static void lp_exec_case(struct lp_exec_mask *mask,
    608                          LLVMValueRef caseval)
    609 {
    610    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    611    struct function_ctx *ctx = func_ctx(mask);
    612 
    613    LLVMValueRef casemask, prevmask;
    614 
    615    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
    616       return;
    617    }
    618 
    619    /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
    620    if (!ctx->switch_in_default) {
    621       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
    622       casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
    623       ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
    624                                              ctx->switch_mask_default, "sw_default_mask");
    625       casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
    626       mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
    627 
    628       lp_exec_mask_update(mask);
    629    }
    630 }
    631 
    632 /*
    633  * Analyse default statement in a switch.
    634  * \return true if default is last statement, false otherwise
    635  * \param default_pc_start contains pc of instruction to jump to
    636  *                         if default wasn't last but there's no
    637  *                         fallthrough into default.
    638  */
    639 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
    640                                        struct lp_build_tgsi_context * bld_base,
    641                                        int *default_pc_start)
    642 {
    643    unsigned pc = bld_base->pc;
    644    struct function_ctx *ctx = func_ctx(mask);
    645    int curr_switch_stack = ctx->switch_stack_size;
    646 
    647    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
    648       return false;
    649    }
    650 
    651    /* skip over case statements which are together with default */
    652    while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
    653       pc++;
    654    }
    655 
    656    while (pc != ~0u && pc < bld_base->num_instructions) {
    657       unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
    658       switch (opcode) {
    659       case TGSI_OPCODE_CASE:
    660          if (curr_switch_stack == ctx->switch_stack_size) {
    661             *default_pc_start = pc - 1;
    662             return false;
    663          }
    664          break;
    665       case TGSI_OPCODE_SWITCH:
    666          curr_switch_stack++;
    667          break;
    668       case TGSI_OPCODE_ENDSWITCH:
    669          if (curr_switch_stack == ctx->switch_stack_size) {
    670             *default_pc_start = pc - 1;
    671             return true;
    672          }
    673          curr_switch_stack--;
    674          break;
    675       }
    676       pc++;
    677    }
    678    /* should never arrive here */
    679    assert(0);
    680    return true;
    681 }
    682 
    683 static void lp_exec_default(struct lp_exec_mask *mask,
    684                             struct lp_build_tgsi_context * bld_base)
    685 {
    686    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    687    struct function_ctx *ctx = func_ctx(mask);
    688 
    689    int default_exec_pc;
    690    boolean default_is_last;
    691 
    692    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
    693       return;
    694    }
    695 
    696    /*
    697     * This is a messy opcode, because it may not be always at the end and
    698     * there can be fallthrough in and out of it.
    699     */
    700 
    701    default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
    702    /*
    703     * If it is last statement in switch (note that case statements appearing
    704     * "at the same time" as default don't change that) everything is just fine,
    705     * update switch mask and go on. This means we can handle default with
    706     * fallthrough INTO it without overhead, if it is last.
    707     */
    708    if (default_is_last) {
    709       LLVMValueRef prevmask, defaultmask;
    710       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
    711       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
    712       defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
    713       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
    714       ctx->switch_in_default = true;
    715 
    716       lp_exec_mask_update(mask);
    717    }
    718    else {
    719       /*
    720        * Technically, "case" immediately before default isn't really a
    721        * fallthrough, however we still have to count them as such as we
    722        * already have updated the masks.
    723        * If that happens in practice could add a switch optimizer pass
    724        * which just gets rid of all case statements appearing together with
    725        * default (or could do switch analysis at switch start time instead).
    726        */
    727       unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
    728       boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
    729                          opcode != TGSI_OPCODE_SWITCH);
    730       /*
    731        * If it is not last statement and there was no fallthrough into it,
    732        * we record the PC and continue execution at next case (again, those
    733        * case encountered at the same time don't count). At endswitch
    734        * time, we update switchmask, and go back executing the code we skipped
    735        * until the next break (possibly re-executing some code with changed mask
    736        * if there was a fallthrough out of default).
    737        * Finally, if it is not last statement and there was a fallthrough into it,
    738        * do the same as with the former case, except instead of skipping the code
    739        * just execute it without updating the mask, then go back and re-execute.
    740        */
    741       ctx->switch_pc = bld_base->pc;
    742       if (!ft_into) {
    743          bld_base->pc = default_exec_pc;
    744       }
    745    }
    746 }
    747 
    748 
    749 /* stores val into an address pointed to by dst_ptr.
    750  * mask->exec_mask is used to figure out which bits of val
    751  * should be stored into the address
    752  * (0 means don't store this bit, 1 means do store).
    753  */
    754 static void lp_exec_mask_store(struct lp_exec_mask *mask,
    755                                struct lp_build_context *bld_store,
    756                                LLVMValueRef pred,
    757                                LLVMValueRef val,
    758                                LLVMValueRef dst_ptr)
    759 {
    760    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    761 
    762    assert(lp_check_value(bld_store->type, val));
    763    assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
    764    assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
    765 
    766    /* Mix the predicate and execution mask */
    767    if (mask->has_mask) {
    768       if (pred) {
    769          pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
    770       } else {
    771          pred = mask->exec_mask;
    772       }
    773    }
    774 
    775    if (pred) {
    776       LLVMValueRef res, dst;
    777 
    778       dst = LLVMBuildLoad(builder, dst_ptr, "");
    779       res = lp_build_select(bld_store, pred, val, dst);
    780       LLVMBuildStore(builder, res, dst_ptr);
    781    } else
    782       LLVMBuildStore(builder, val, dst_ptr);
    783 }
    784 
    785 static void lp_exec_mask_call(struct lp_exec_mask *mask,
    786                               int func,
    787                               int *pc)
    788 {
    789    if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
    790       return;
    791    }
    792 
    793    lp_exec_mask_function_init(mask, mask->function_stack_size);
    794    mask->function_stack[mask->function_stack_size].pc = *pc;
    795    mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
    796    mask->function_stack_size++;
    797    *pc = func;
    798 }
    799 
    800 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
    801 {
    802    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    803    struct function_ctx *ctx = func_ctx(mask);
    804    LLVMValueRef exec_mask;
    805 
    806    if (ctx->cond_stack_size == 0 &&
    807        ctx->loop_stack_size == 0 &&
    808        ctx->switch_stack_size == 0 &&
    809        mask->function_stack_size == 1) {
    810       /* returning from main() */
    811       *pc = -1;
    812       return;
    813    }
    814 
    815    if (mask->function_stack_size == 1) {
    816       /*
    817        * This requires special handling since we need to ensure
    818        * we don't drop the mask even if we have no call stack
    819        * (e.g. after a ret in a if clause after the endif)
    820        */
    821       mask->ret_in_main = TRUE;
    822    }
    823 
    824    exec_mask = LLVMBuildNot(builder,
    825                             mask->exec_mask,
    826                             "ret");
    827 
    828    mask->ret_mask = LLVMBuildAnd(builder,
    829                                  mask->ret_mask,
    830                                  exec_mask, "ret_full");
    831 
    832    lp_exec_mask_update(mask);
    833 }
    834 
    835 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
    836 {
    837 }
    838 
    839 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
    840 {
    841    struct function_ctx *ctx;
    842 
    843    assert(mask->function_stack_size > 1);
    844    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
    845 
    846    ctx = func_ctx(mask);
    847    mask->function_stack_size--;
    848 
    849    *pc = ctx->pc;
    850    mask->ret_mask = ctx->ret_mask;
    851 
    852    lp_exec_mask_update(mask);
    853 }
    854 
    855 
    856 static LLVMValueRef
    857 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
    858              unsigned file,
    859              int index,
    860              unsigned chan)
    861 {
    862    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    863    LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
    864    LLVMValueRef var_of_array;
    865 
    866    switch (file) {
    867    case TGSI_FILE_TEMPORARY:
    868       array_of_vars = bld->temps;
    869       var_of_array = bld->temps_array;
    870       break;
    871    case TGSI_FILE_OUTPUT:
    872       array_of_vars = bld->outputs;
    873       var_of_array = bld->outputs_array;
    874       break;
    875    default:
    876       assert(0);
    877       return NULL;
    878    }
    879 
    880    assert(chan < 4);
    881 
    882    if (bld->indirect_files & (1 << file)) {
    883       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
    884       return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
    885    }
    886    else {
    887       assert(index <= bld->bld_base.info->file_max[file]);
    888       return array_of_vars[index][chan];
    889    }
    890 }
    891 
    892 
    893 /**
    894  * Return pointer to a temporary register channel (src or dest).
    895  * Note that indirect addressing cannot be handled here.
    896  * \param index  which temporary register
    897  * \param chan  which channel of the temp register.
    898  */
    899 LLVMValueRef
    900 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
    901              unsigned index,
    902              unsigned chan)
    903 {
    904    return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
    905 }
    906 
    907 /**
    908  * Return pointer to a output register channel (src or dest).
    909  * Note that indirect addressing cannot be handled here.
    910  * \param index  which output register
    911  * \param chan  which channel of the output register.
    912  */
    913 LLVMValueRef
    914 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
    915                unsigned index,
    916                unsigned chan)
    917 {
    918    return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
    919 }
    920 
    921 /*
    922  * If we have indirect addressing in outputs copy our alloca array
    923  * to the outputs slots specified by the caller to make sure
    924  * our outputs are delivered consistently via the same interface.
    925  */
    926 static void
    927 gather_outputs(struct lp_build_tgsi_soa_context * bld)
    928 {
    929    if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
    930       unsigned index, chan;
    931       assert(bld->bld_base.info->num_outputs <=
    932              bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
    933       for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
    934          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
    935             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
    936          }
    937       }
    938    }
    939 }
    940 
    941 /**
    942  * Gather vector.
    943  * XXX the lp_build_gather() function should be capable of doing this
    944  * with a little work.
    945  */
    946 static LLVMValueRef
    947 build_gather(struct lp_build_tgsi_context *bld_base,
    948              LLVMValueRef base_ptr,
    949              LLVMValueRef indexes,
    950              LLVMValueRef overflow_mask,
    951              LLVMValueRef indexes2)
    952 {
    953    struct gallivm_state *gallivm = bld_base->base.gallivm;
    954    LLVMBuilderRef builder = gallivm->builder;
    955    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    956    struct lp_build_context *bld = &bld_base->base;
    957    LLVMValueRef res;
    958    unsigned i;
    959 
    960    if (indexes2)
    961       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
    962    else
    963       res = bld->undef;
    964    /*
    965     * overflow_mask is a vector telling us which channels
    966     * in the vector overflowed. We use the overflow behavior for
    967     * constant buffers which is defined as:
    968     * Out of bounds access to constant buffer returns 0 in all
    969     * components. Out of bounds behavior is always with respect
    970     * to the size of the buffer bound at that slot.
    971     */
    972 
    973    if (overflow_mask) {
    974       /*
    975        * We avoid per-element control flow here (also due to llvm going crazy,
    976        * though I suspect it's better anyway since overflow is likely rare).
    977        * Note that since we still fetch from buffers even if num_elements was
    978        * zero (in this case we'll fetch from index zero) the jit func callers
    979        * MUST provide valid fake constant buffers of size 4x32 (the values do
    980        * not matter), otherwise we'd still need (not per element though)
    981        * control flow.
    982        */
    983       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
    984       if (indexes2)
    985          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
    986    }
    987 
    988    /*
    989     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
    990     */
    991    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
    992       LLVMValueRef si, di;
    993       LLVMValueRef index;
    994       LLVMValueRef scalar_ptr, scalar;
    995 
    996       di = lp_build_const_int32(bld->gallivm, i);
    997       if (indexes2)
    998          si = lp_build_const_int32(bld->gallivm, i >> 1);
    999       else
   1000          si = di;
   1001 
   1002       if (indexes2 && (i & 1)) {
   1003          index = LLVMBuildExtractElement(builder,
   1004                                          indexes2, si, "");
   1005       } else {
   1006          index = LLVMBuildExtractElement(builder,
   1007                                          indexes, si, "");
   1008       }
   1009       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
   1010                                 &index, 1, "gather_ptr");
   1011       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
   1012 
   1013       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
   1014    }
   1015 
   1016    if (overflow_mask) {
   1017       if (indexes2) {
   1018          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
   1019          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
   1020                                        bld_base->dbl_bld.int_vec_type, "");
   1021          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
   1022                                bld_base->dbl_bld.zero, res);
   1023       } else
   1024          res = lp_build_select(bld, overflow_mask, bld->zero, res);
   1025    }
   1026 
   1027    return res;
   1028 }
   1029 
   1030 
   1031 /**
   1032  * Scatter/store vector.
   1033  */
   1034 static void
   1035 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
   1036                   LLVMValueRef base_ptr,
   1037                   LLVMValueRef indexes,
   1038                   LLVMValueRef values,
   1039                   struct lp_exec_mask *mask,
   1040                   LLVMValueRef pred)
   1041 {
   1042    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1043    LLVMBuilderRef builder = gallivm->builder;
   1044    unsigned i;
   1045 
   1046    /* Mix the predicate and execution mask */
   1047    if (mask->has_mask) {
   1048       if (pred) {
   1049          pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
   1050       }
   1051       else {
   1052          pred = mask->exec_mask;
   1053       }
   1054    }
   1055 
   1056    /*
   1057     * Loop over elements of index_vec, store scalar value.
   1058     */
   1059    for (i = 0; i < bld->bld_base.base.type.length; i++) {
   1060       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
   1061       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
   1062       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
   1063       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
   1064       LLVMValueRef scalar_pred = pred ?
   1065          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
   1066 
   1067       if (0)
   1068          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
   1069                          ii, val, index, scalar_ptr);
   1070 
   1071       if (scalar_pred) {
   1072          LLVMValueRef real_val, dst_val;
   1073          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
   1074          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
   1075          LLVMBuildStore(builder, real_val, scalar_ptr);
   1076       }
   1077       else {
   1078          LLVMBuildStore(builder, val, scalar_ptr);
   1079       }
   1080    }
   1081 }
   1082 
   1083 
   1084 /**
   1085  * Read the current value of the ADDR register, convert the floats to
   1086  * ints, add the base index and return the vector of offsets.
   1087  * The offsets will be used to index into the constant buffer or
   1088  * temporary register file.
   1089  */
   1090 static LLVMValueRef
   1091 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
   1092                    unsigned reg_file, unsigned reg_index,
   1093                    const struct tgsi_ind_register *indirect_reg)
   1094 {
   1095    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   1096    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
   1097    /* always use X component of address register */
   1098    unsigned swizzle = indirect_reg->Swizzle;
   1099    LLVMValueRef base;
   1100    LLVMValueRef rel;
   1101    LLVMValueRef max_index;
   1102    LLVMValueRef index;
   1103 
   1104    assert(bld->indirect_files & (1 << reg_file));
   1105 
   1106    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
   1107 
   1108    assert(swizzle < 4);
   1109    switch (indirect_reg->File) {
   1110    case TGSI_FILE_ADDRESS:
   1111       rel = LLVMBuildLoad(builder,
   1112                           bld->addr[indirect_reg->Index][swizzle],
   1113                           "load addr reg");
   1114       /* ADDR LLVM values already have LLVM integer type. */
   1115       break;
   1116    case TGSI_FILE_TEMPORARY:
   1117       rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
   1118       rel = LLVMBuildLoad(builder, rel, "load temp reg");
   1119       /* TEMP LLVM values always have LLVM float type, but for indirection, the
   1120        * value actually stored is expected to be an integer */
   1121       rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
   1122       break;
   1123    default:
   1124       assert(0);
   1125       rel = uint_bld->zero;
   1126    }
   1127 
   1128    index = lp_build_add(uint_bld, base, rel);
   1129 
   1130    /*
   1131     * emit_fetch_constant handles constant buffer overflow so this code
   1132     * is pointless for them.
   1133     * Furthermore the D3D10 spec in section 6.5 says:
   1134     * If the constant buffer bound to a slot is larger than the size
   1135     * declared in the shader for that slot, implementations are allowed
   1136     * to return incorrect data (not necessarily 0) for indices that are
   1137     * larger than the declared size but smaller than the buffer size.
   1138     */
   1139    if (reg_file != TGSI_FILE_CONSTANT) {
   1140       max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
   1141                                          uint_bld->type,
   1142                                          bld->bld_base.info->file_max[reg_file]);
   1143 
   1144       assert(!uint_bld->type.sign);
   1145       index = lp_build_min(uint_bld, index, max_index);
   1146    }
   1147 
   1148    return index;
   1149 }
   1150 
   1151 static struct lp_build_context *
   1152 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
   1153 	       enum tgsi_opcode_type stype)
   1154 {
   1155    struct lp_build_context *bld_fetch;
   1156 
   1157    switch (stype) {
   1158    case TGSI_TYPE_FLOAT:
   1159    case TGSI_TYPE_UNTYPED:
   1160       bld_fetch = &bld_base->base;
   1161       break;
   1162    case TGSI_TYPE_UNSIGNED:
   1163       bld_fetch = &bld_base->uint_bld;
   1164       break;
   1165    case TGSI_TYPE_SIGNED:
   1166       bld_fetch = &bld_base->int_bld;
   1167       break;
   1168    case TGSI_TYPE_DOUBLE:
   1169       bld_fetch = &bld_base->dbl_bld;
   1170       break;
   1171    case TGSI_TYPE_UNSIGNED64:
   1172       bld_fetch = &bld_base->uint64_bld;
   1173       break;
   1174    case TGSI_TYPE_SIGNED64:
   1175       bld_fetch = &bld_base->int64_bld;
   1176       break;
   1177    case TGSI_TYPE_VOID:
   1178    default:
   1179       assert(0);
   1180       bld_fetch = NULL;
   1181       break;
   1182    }
   1183    return bld_fetch;
   1184 }
   1185 
   1186 static LLVMValueRef
   1187 get_soa_array_offsets(struct lp_build_context *uint_bld,
   1188                       LLVMValueRef indirect_index,
   1189                       unsigned chan_index,
   1190                       boolean need_perelement_offset)
   1191 {
   1192    struct gallivm_state *gallivm = uint_bld->gallivm;
   1193    LLVMValueRef chan_vec =
   1194       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
   1195    LLVMValueRef length_vec =
   1196       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
   1197    LLVMValueRef index_vec;
   1198 
   1199    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
   1200    index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
   1201    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
   1202    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
   1203 
   1204    if (need_perelement_offset) {
   1205       LLVMValueRef pixel_offsets;
   1206       unsigned i;
   1207      /* build pixel offset vector: {0, 1, 2, 3, ...} */
   1208       pixel_offsets = uint_bld->undef;
   1209       for (i = 0; i < uint_bld->type.length; i++) {
   1210          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
   1211          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
   1212                                                 ii, ii, "");
   1213       }
   1214       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
   1215    }
   1216    return index_vec;
   1217 }
   1218 
   1219 static LLVMValueRef
   1220 emit_fetch_constant(
   1221    struct lp_build_tgsi_context * bld_base,
   1222    const struct tgsi_full_src_register * reg,
   1223    enum tgsi_opcode_type stype,
   1224    unsigned swizzle)
   1225 {
   1226    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1227    struct gallivm_state *gallivm = bld_base->base.gallivm;
   1228    LLVMBuilderRef builder = gallivm->builder;
   1229    struct lp_build_context *uint_bld = &bld_base->uint_bld;
   1230    unsigned dimension = 0;
   1231    LLVMValueRef consts_ptr;
   1232    LLVMValueRef num_consts;
   1233    LLVMValueRef res;
   1234 
   1235    /* XXX: Handle fetching xyzw components as a vector */
   1236    assert(swizzle != ~0u);
   1237 
   1238    if (reg->Register.Dimension) {
   1239       assert(!reg->Dimension.Indirect);
   1240       dimension = reg->Dimension.Index;
   1241       assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
   1242    }
   1243 
   1244    consts_ptr = bld->consts[dimension];
   1245    num_consts = bld->consts_sizes[dimension];
   1246 
   1247    if (reg->Register.Indirect) {
   1248       LLVMValueRef indirect_index;
   1249       LLVMValueRef swizzle_vec =
   1250          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
   1251       LLVMValueRef index_vec;  /* index into the const buffer */
   1252       LLVMValueRef overflow_mask;
   1253       LLVMValueRef index_vec2 = NULL;
   1254 
   1255       indirect_index = get_indirect_index(bld,
   1256                                           reg->Register.File,
   1257                                           reg->Register.Index,
   1258                                           &reg->Indirect);
   1259 
   1260       /* All fetches are from the same constant buffer, so
   1261        * we need to propagate the size to a vector to do a
   1262        * vector comparison */
   1263       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
   1264       /* Construct a boolean vector telling us which channels
   1265        * overflow the bound constant buffer */
   1266       overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
   1267                                        indirect_index, num_consts);
   1268 
   1269       /* index_vec = indirect_index * 4 + swizzle */
   1270       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
   1271       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
   1272 
   1273       if (tgsi_type_is_64bit(stype)) {
   1274          LLVMValueRef swizzle_vec2;
   1275          swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
   1276          index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
   1277          index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
   1278       }
   1279       /* Gather values from the constant buffer */
   1280       res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
   1281    }
   1282    else {
   1283       LLVMValueRef index;  /* index into the const buffer */
   1284       LLVMValueRef scalar, scalar_ptr;
   1285       struct lp_build_context *bld_broad = &bld_base->base;
   1286       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
   1287 
   1288       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
   1289                                 &index, 1, "");
   1290       if (stype == TGSI_TYPE_DOUBLE) {
   1291          LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
   1292          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
   1293          bld_broad = &bld_base->dbl_bld;
   1294       } else if (stype == TGSI_TYPE_UNSIGNED64) {
   1295          LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
   1296          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
   1297          bld_broad = &bld_base->uint64_bld;
   1298       } else if (stype == TGSI_TYPE_SIGNED64) {
   1299          LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
   1300          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
   1301          bld_broad = &bld_base->int64_bld;
   1302       }
   1303       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
   1304       res = lp_build_broadcast_scalar(bld_broad, scalar);
   1305    }
   1306 
   1307    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
   1308       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
   1309       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
   1310    }
   1311 
   1312    return res;
   1313 }
   1314 
   1315 /**
   1316  * Fetch 64-bit values from two separate channels.
   1317  * 64-bit values are stored split across two channels, like xy and zw.
   1318  * This function creates a set of 16 floats,
   1319  * extracts the values from the two channels,
   1320  * puts them in the correct place, then casts to 8 64-bits.
   1321  */
   1322 static LLVMValueRef
   1323 emit_fetch_64bit(
   1324    struct lp_build_tgsi_context * bld_base,
   1325    enum tgsi_opcode_type stype,
   1326    LLVMValueRef input,
   1327    LLVMValueRef input2)
   1328 {
   1329    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1330    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1331    LLVMBuilderRef builder = gallivm->builder;
   1332    LLVMValueRef res;
   1333    struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
   1334    int i;
   1335    LLVMValueRef shuffles[16];
   1336    int len = bld_base->base.type.length * 2;
   1337    assert(len <= 16);
   1338 
   1339    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
   1340       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
   1341       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
   1342    }
   1343    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
   1344 
   1345    return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
   1346 }
   1347 
   1348 static LLVMValueRef
   1349 emit_fetch_immediate(
   1350    struct lp_build_tgsi_context * bld_base,
   1351    const struct tgsi_full_src_register * reg,
   1352    enum tgsi_opcode_type stype,
   1353    unsigned swizzle)
   1354 {
   1355    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1356    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1357    LLVMBuilderRef builder = gallivm->builder;
   1358    LLVMValueRef res = NULL;
   1359 
   1360    if (bld->use_immediates_array || reg->Register.Indirect) {
   1361       LLVMValueRef imms_array;
   1362       LLVMTypeRef fptr_type;
   1363 
   1364       /* cast imms_array pointer to float* */
   1365       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1366       imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
   1367 
   1368       if (reg->Register.Indirect) {
   1369          LLVMValueRef indirect_index;
   1370          LLVMValueRef index_vec;  /* index into the immediate register array */
   1371          LLVMValueRef index_vec2 = NULL;
   1372          indirect_index = get_indirect_index(bld,
   1373                                              reg->Register.File,
   1374                                              reg->Register.Index,
   1375                                              &reg->Indirect);
   1376          /*
   1377           * Unlike for other reg classes, adding pixel offsets is unnecessary -
   1378           * immediates are stored as full vectors (FIXME??? - might be better
   1379           * to store them the same as constants) but all elements are the same
   1380           * in any case.
   1381           */
   1382          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
   1383                                            indirect_index,
   1384                                            swizzle,
   1385                                            FALSE);
   1386          if (tgsi_type_is_64bit(stype))
   1387             index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
   1388                                               indirect_index,
   1389                                               swizzle + 1,
   1390                                               FALSE);
   1391          /* Gather values from the immediate register array */
   1392          res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
   1393       } else {
   1394          LLVMValueRef lindex = lp_build_const_int32(gallivm,
   1395                                         reg->Register.Index * 4 + swizzle);
   1396          LLVMValueRef imms_ptr =  LLVMBuildGEP(builder,
   1397                                                 bld->imms_array, &lindex, 1, "");
   1398          res = LLVMBuildLoad(builder, imms_ptr, "");
   1399 
   1400          if (tgsi_type_is_64bit(stype)) {
   1401             LLVMValueRef lindex1;
   1402             LLVMValueRef imms_ptr2;
   1403             LLVMValueRef res2;
   1404 
   1405             lindex1 = lp_build_const_int32(gallivm,
   1406                                            reg->Register.Index * 4 + swizzle + 1);
   1407             imms_ptr2 = LLVMBuildGEP(builder,
   1408                                       bld->imms_array, &lindex1, 1, "");
   1409             res2 = LLVMBuildLoad(builder, imms_ptr2, "");
   1410             res = emit_fetch_64bit(bld_base, stype, res, res2);
   1411          }
   1412       }
   1413    }
   1414    else {
   1415       res = bld->immediates[reg->Register.Index][swizzle];
   1416       if (tgsi_type_is_64bit(stype))
   1417          res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
   1418    }
   1419 
   1420    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
   1421       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
   1422       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
   1423    }
   1424    return res;
   1425 }
   1426 
   1427 static LLVMValueRef
   1428 emit_fetch_input(
   1429    struct lp_build_tgsi_context * bld_base,
   1430    const struct tgsi_full_src_register * reg,
   1431    enum tgsi_opcode_type stype,
   1432    unsigned swizzle)
   1433 {
   1434    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1435    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1436    LLVMBuilderRef builder = gallivm->builder;
   1437    LLVMValueRef res;
   1438 
   1439    if (reg->Register.Indirect) {
   1440       LLVMValueRef indirect_index;
   1441       LLVMValueRef index_vec;  /* index into the input reg array */
   1442       LLVMValueRef index_vec2 = NULL;
   1443       LLVMValueRef inputs_array;
   1444       LLVMTypeRef fptr_type;
   1445 
   1446       indirect_index = get_indirect_index(bld,
   1447                                           reg->Register.File,
   1448                                           reg->Register.Index,
   1449                                           &reg->Indirect);
   1450 
   1451       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
   1452                                         indirect_index,
   1453                                         swizzle,
   1454                                         TRUE);
   1455       if (tgsi_type_is_64bit(stype)) {
   1456          index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
   1457                                            indirect_index,
   1458                                            swizzle + 1,
   1459                                            TRUE);
   1460       }
   1461       /* cast inputs_array pointer to float* */
   1462       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1463       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
   1464 
   1465       /* Gather values from the input register array */
   1466       res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
   1467    } else {
   1468       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
   1469          LLVMValueRef lindex = lp_build_const_int32(gallivm,
   1470                                         reg->Register.Index * 4 + swizzle);
   1471          LLVMValueRef input_ptr = LLVMBuildGEP(builder,
   1472                                                bld->inputs_array, &lindex, 1, "");
   1473 
   1474          res = LLVMBuildLoad(builder, input_ptr, "");
   1475          if (tgsi_type_is_64bit(stype)) {
   1476             LLVMValueRef lindex1;
   1477             LLVMValueRef input_ptr2;
   1478             LLVMValueRef res2;
   1479 
   1480             lindex1 = lp_build_const_int32(gallivm,
   1481                                            reg->Register.Index * 4 + swizzle + 1);
   1482             input_ptr2 = LLVMBuildGEP(builder,
   1483                                       bld->inputs_array, &lindex1, 1, "");
   1484             res2 = LLVMBuildLoad(builder, input_ptr2, "");
   1485             res = emit_fetch_64bit(bld_base, stype, res, res2);
   1486          }
   1487       }
   1488       else {
   1489          res = bld->inputs[reg->Register.Index][swizzle];
   1490          if (tgsi_type_is_64bit(stype))
   1491             res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
   1492       }
   1493    }
   1494 
   1495    assert(res);
   1496 
   1497    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
   1498       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
   1499       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
   1500    }
   1501 
   1502    return res;
   1503 }
   1504 
   1505 
   1506 static LLVMValueRef
   1507 emit_fetch_gs_input(
   1508    struct lp_build_tgsi_context * bld_base,
   1509    const struct tgsi_full_src_register * reg,
   1510    enum tgsi_opcode_type stype,
   1511    unsigned swizzle)
   1512 {
   1513    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1514    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1515    const struct tgsi_shader_info *info = bld->bld_base.info;
   1516    LLVMBuilderRef builder = gallivm->builder;
   1517    LLVMValueRef attrib_index = NULL;
   1518    LLVMValueRef vertex_index = NULL;
   1519    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
   1520    LLVMValueRef res;
   1521 
   1522    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
   1523       /* This is really a system value not a regular input */
   1524       assert(!reg->Register.Indirect);
   1525       assert(!reg->Dimension.Indirect);
   1526       res = bld->system_values.prim_id;
   1527       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
   1528          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
   1529       }
   1530       return res;
   1531    }
   1532 
   1533    if (reg->Register.Indirect) {
   1534       attrib_index = get_indirect_index(bld,
   1535                                         reg->Register.File,
   1536                                         reg->Register.Index,
   1537                                         &reg->Indirect);
   1538    } else {
   1539       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
   1540    }
   1541 
   1542    if (reg->Dimension.Indirect) {
   1543       vertex_index = get_indirect_index(bld,
   1544                                         reg->Register.File,
   1545                                         reg->Dimension.Index,
   1546                                         &reg->DimIndirect);
   1547    } else {
   1548       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
   1549    }
   1550 
   1551    res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
   1552                                     reg->Dimension.Indirect,
   1553                                     vertex_index,
   1554                                     reg->Register.Indirect,
   1555                                     attrib_index,
   1556                                     swizzle_index);
   1557 
   1558    assert(res);
   1559    if (tgsi_type_is_64bit(stype)) {
   1560       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
   1561       LLVMValueRef res2;
   1562       res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
   1563                                         reg->Dimension.Indirect,
   1564                                         vertex_index,
   1565                                         reg->Register.Indirect,
   1566                                         attrib_index,
   1567                                         swizzle_index);
   1568       assert(res2);
   1569       res = emit_fetch_64bit(bld_base, stype, res, res2);
   1570    } else if (stype == TGSI_TYPE_UNSIGNED) {
   1571       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
   1572    } else if (stype == TGSI_TYPE_SIGNED) {
   1573       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
   1574    }
   1575 
   1576    return res;
   1577 }
   1578 
   1579 static LLVMValueRef
   1580 emit_fetch_temporary(
   1581    struct lp_build_tgsi_context * bld_base,
   1582    const struct tgsi_full_src_register * reg,
   1583    enum tgsi_opcode_type stype,
   1584    unsigned swizzle)
   1585 {
   1586    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1587    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1588    LLVMBuilderRef builder = gallivm->builder;
   1589    LLVMValueRef res;
   1590 
   1591    if (reg->Register.Indirect) {
   1592       LLVMValueRef indirect_index;
   1593       LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
   1594       LLVMValueRef temps_array;
   1595       LLVMTypeRef fptr_type;
   1596 
   1597       indirect_index = get_indirect_index(bld,
   1598                                           reg->Register.File,
   1599                                           reg->Register.Index,
   1600                                           &reg->Indirect);
   1601 
   1602       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
   1603                                         indirect_index,
   1604                                         swizzle,
   1605                                         TRUE);
   1606       if (tgsi_type_is_64bit(stype)) {
   1607                index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
   1608                                                   indirect_index,
   1609                                                   swizzle + 1,
   1610                                                   TRUE);
   1611       }
   1612 
   1613       /* cast temps_array pointer to float* */
   1614       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1615       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
   1616 
   1617       /* Gather values from the temporary register array */
   1618       res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
   1619    }
   1620    else {
   1621       LLVMValueRef temp_ptr;
   1622       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
   1623       res = LLVMBuildLoad(builder, temp_ptr, "");
   1624 
   1625       if (tgsi_type_is_64bit(stype)) {
   1626          LLVMValueRef temp_ptr2, res2;
   1627 
   1628          temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
   1629          res2 = LLVMBuildLoad(builder, temp_ptr2, "");
   1630          res = emit_fetch_64bit(bld_base, stype, res, res2);
   1631       }
   1632    }
   1633 
   1634    if (stype == TGSI_TYPE_SIGNED ||
   1635        stype == TGSI_TYPE_UNSIGNED ||
   1636        stype == TGSI_TYPE_DOUBLE ||
   1637        stype == TGSI_TYPE_SIGNED64 ||
   1638        stype == TGSI_TYPE_UNSIGNED64) {
   1639       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
   1640       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
   1641    }
   1642 
   1643    return res;
   1644 }
   1645 
   1646 static LLVMValueRef
   1647 emit_fetch_system_value(
   1648    struct lp_build_tgsi_context * bld_base,
   1649    const struct tgsi_full_src_register * reg,
   1650    enum tgsi_opcode_type stype,
   1651    unsigned swizzle)
   1652 {
   1653    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1654    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1655    const struct tgsi_shader_info *info = bld->bld_base.info;
   1656    LLVMBuilderRef builder = gallivm->builder;
   1657    LLVMValueRef res;
   1658    enum tgsi_opcode_type atype; // Actual type of the value
   1659 
   1660    assert(!reg->Register.Indirect);
   1661 
   1662    switch (info->system_value_semantic_name[reg->Register.Index]) {
   1663    case TGSI_SEMANTIC_INSTANCEID:
   1664       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
   1665       atype = TGSI_TYPE_UNSIGNED;
   1666       break;
   1667 
   1668    case TGSI_SEMANTIC_VERTEXID:
   1669       res = bld->system_values.vertex_id;
   1670       atype = TGSI_TYPE_UNSIGNED;
   1671       break;
   1672 
   1673    case TGSI_SEMANTIC_VERTEXID_NOBASE:
   1674       res = bld->system_values.vertex_id_nobase;
   1675       atype = TGSI_TYPE_UNSIGNED;
   1676       break;
   1677 
   1678    case TGSI_SEMANTIC_BASEVERTEX:
   1679       res = bld->system_values.basevertex;
   1680       atype = TGSI_TYPE_UNSIGNED;
   1681       break;
   1682 
   1683    case TGSI_SEMANTIC_PRIMID:
   1684       res = bld->system_values.prim_id;
   1685       atype = TGSI_TYPE_UNSIGNED;
   1686       break;
   1687 
   1688    case TGSI_SEMANTIC_INVOCATIONID:
   1689       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
   1690       atype = TGSI_TYPE_UNSIGNED;
   1691       break;
   1692 
   1693    default:
   1694       assert(!"unexpected semantic in emit_fetch_system_value");
   1695       res = bld_base->base.zero;
   1696       atype = TGSI_TYPE_FLOAT;
   1697       break;
   1698    }
   1699 
   1700    if (atype != stype) {
   1701       if (stype == TGSI_TYPE_FLOAT) {
   1702          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
   1703       } else if (stype == TGSI_TYPE_UNSIGNED) {
   1704          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
   1705       } else if (stype == TGSI_TYPE_SIGNED) {
   1706          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
   1707       }
   1708    }
   1709 
   1710    return res;
   1711 }
   1712 
   1713 /**
   1714  * Register fetch with derivatives.
   1715  */
   1716 static void
   1717 emit_fetch_deriv(
   1718    struct lp_build_tgsi_soa_context *bld,
   1719    LLVMValueRef src,
   1720    LLVMValueRef *res,
   1721    LLVMValueRef *ddx,
   1722    LLVMValueRef *ddy)
   1723 {
   1724    if (res)
   1725       *res = src;
   1726 
   1727    /* TODO: use interpolation coeffs for inputs */
   1728 
   1729    if (ddx)
   1730       *ddx = lp_build_ddx(&bld->bld_base.base, src);
   1731 
   1732    if (ddy)
   1733       *ddy = lp_build_ddy(&bld->bld_base.base, src);
   1734 }
   1735 
   1736 
   1737 /**
   1738  * Predicate.
   1739  */
   1740 static void
   1741 emit_fetch_predicate(
   1742    struct lp_build_tgsi_soa_context *bld,
   1743    const struct tgsi_full_instruction *inst,
   1744    LLVMValueRef *pred)
   1745 {
   1746    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   1747    unsigned index;
   1748    unsigned char swizzles[4];
   1749    LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
   1750    LLVMValueRef value;
   1751    unsigned chan;
   1752 
   1753    if (!inst->Instruction.Predicate) {
   1754       TGSI_FOR_EACH_CHANNEL( chan ) {
   1755          pred[chan] = NULL;
   1756       }
   1757       return;
   1758    }
   1759 
   1760    swizzles[0] = inst->Predicate.SwizzleX;
   1761    swizzles[1] = inst->Predicate.SwizzleY;
   1762    swizzles[2] = inst->Predicate.SwizzleZ;
   1763    swizzles[3] = inst->Predicate.SwizzleW;
   1764 
   1765    index = inst->Predicate.Index;
   1766    assert(index < LP_MAX_TGSI_PREDS);
   1767 
   1768    TGSI_FOR_EACH_CHANNEL( chan ) {
   1769       unsigned swizzle = swizzles[chan];
   1770 
   1771       /*
   1772        * Only fetch the predicate register channels that are actually listed
   1773        * in the swizzles
   1774        */
   1775       if (!unswizzled[swizzle]) {
   1776          value = LLVMBuildLoad(builder,
   1777                                bld->preds[index][swizzle], "");
   1778 
   1779          /*
   1780           * Convert the value to an integer mask.
   1781           *
   1782           * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
   1783           * is needlessly causing two comparisons due to storing the intermediate
   1784           * result as float vector instead of an integer mask vector.
   1785           */
   1786          value = lp_build_compare(bld->bld_base.base.gallivm,
   1787                                   bld->bld_base.base.type,
   1788                                   PIPE_FUNC_NOTEQUAL,
   1789                                   value,
   1790                                   bld->bld_base.base.zero);
   1791          if (inst->Predicate.Negate) {
   1792             value = LLVMBuildNot(builder, value, "");
   1793          }
   1794 
   1795          unswizzled[swizzle] = value;
   1796       } else {
   1797          value = unswizzled[swizzle];
   1798       }
   1799 
   1800       pred[chan] = value;
   1801    }
   1802 }
   1803 
   1804 /**
   1805  * store an array of 8 64-bit into two arrays of 8 floats
   1806  * i.e.
   1807  * value is d0, d1, d2, d3 etc.
   1808  * each 64-bit has high and low pieces x, y
   1809  * so gets stored into the separate channels as:
   1810  * chan_ptr = d0.x, d1.x, d2.x, d3.x
   1811  * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
   1812  */
   1813 static void
   1814 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
   1815                       LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
   1816                       LLVMValueRef pred,
   1817                       LLVMValueRef value)
   1818 {
   1819    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1820    struct gallivm_state *gallivm = bld_base->base.gallivm;
   1821    LLVMBuilderRef builder = gallivm->builder;
   1822    struct lp_build_context *float_bld = &bld_base->base;
   1823    unsigned i;
   1824    LLVMValueRef temp, temp2;
   1825    LLVMValueRef shuffles[8];
   1826    LLVMValueRef shuffles2[8];
   1827 
   1828    for (i = 0; i < bld_base->base.type.length; i++) {
   1829       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
   1830       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
   1831    }
   1832 
   1833    temp = LLVMBuildShuffleVector(builder, value,
   1834                                  LLVMGetUndef(LLVMTypeOf(value)),
   1835                                  LLVMConstVector(shuffles,
   1836                                                  bld_base->base.type.length),
   1837                                  "");
   1838    temp2 = LLVMBuildShuffleVector(builder, value,
   1839                                   LLVMGetUndef(LLVMTypeOf(value)),
   1840                                   LLVMConstVector(shuffles2,
   1841                                                   bld_base->base.type.length),
   1842                                   "");
   1843 
   1844    lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp, chan_ptr);
   1845    lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp2, chan_ptr2);
   1846 }
   1847 
   1848 /**
   1849  * Register store.
   1850  */
   1851 static void
   1852 emit_store_chan(
   1853    struct lp_build_tgsi_context *bld_base,
   1854    const struct tgsi_full_instruction *inst,
   1855    unsigned index,
   1856    unsigned chan_index,
   1857    LLVMValueRef pred,
   1858    LLVMValueRef value)
   1859 {
   1860    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1861    struct gallivm_state *gallivm = bld_base->base.gallivm;
   1862    LLVMBuilderRef builder = gallivm->builder;
   1863    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
   1864    struct lp_build_context *float_bld = &bld_base->base;
   1865    struct lp_build_context *int_bld = &bld_base->int_bld;
   1866    LLVMValueRef indirect_index = NULL;
   1867    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
   1868 
   1869    /*
   1870     * Apply saturation.
   1871     *
   1872     * It is always assumed to be float.
   1873     */
   1874    if (inst->Instruction.Saturate) {
   1875       assert(dtype == TGSI_TYPE_FLOAT ||
   1876              dtype == TGSI_TYPE_UNTYPED);
   1877       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
   1878       value = lp_build_clamp_zero_one_nanzero(float_bld, value);
   1879    }
   1880 
   1881    if (reg->Register.Indirect) {
   1882       /*
   1883        * Currently the mesa/st doesn't generate indirect stores
   1884        * to 64-bit values, it normally uses MOV to do indirect stores.
   1885        */
   1886       assert(!tgsi_type_is_64bit(dtype));
   1887       indirect_index = get_indirect_index(bld,
   1888                                           reg->Register.File,
   1889                                           reg->Register.Index,
   1890                                           &reg->Indirect);
   1891    } else {
   1892       assert(reg->Register.Index <=
   1893                              bld_base->info->file_max[reg->Register.File]);
   1894    }
   1895 
   1896    if (DEBUG_EXECUTION) {
   1897       emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
   1898    }
   1899 
   1900    switch( reg->Register.File ) {
   1901    case TGSI_FILE_OUTPUT:
   1902       /* Outputs are always stored as floats */
   1903       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
   1904 
   1905       if (reg->Register.Indirect) {
   1906          LLVMValueRef index_vec;  /* indexes into the output registers */
   1907          LLVMValueRef outputs_array;
   1908          LLVMTypeRef fptr_type;
   1909 
   1910          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
   1911                                            indirect_index,
   1912                                            chan_index,
   1913                                            TRUE);
   1914 
   1915          fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1916          outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
   1917 
   1918          /* Scatter store values into output registers */
   1919          emit_mask_scatter(bld, outputs_array, index_vec, value,
   1920                            &bld->exec_mask, pred);
   1921       }
   1922       else {
   1923          LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
   1924                                                   chan_index);
   1925 
   1926          if (tgsi_type_is_64bit(dtype)) {
   1927             LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
   1928                                                       chan_index + 1);
   1929             emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
   1930                                   pred, value);
   1931          } else
   1932             lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
   1933       }
   1934       break;
   1935 
   1936    case TGSI_FILE_TEMPORARY:
   1937       /* Temporaries are always stored as floats */
   1938       if (!tgsi_type_is_64bit(dtype))
   1939          value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
   1940       else
   1941          value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
   1942 
   1943       if (reg->Register.Indirect) {
   1944          LLVMValueRef index_vec;  /* indexes into the temp registers */
   1945          LLVMValueRef temps_array;
   1946          LLVMTypeRef fptr_type;
   1947 
   1948          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
   1949                                            indirect_index,
   1950                                            chan_index,
   1951                                            TRUE);
   1952 
   1953          fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1954          temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
   1955 
   1956          /* Scatter store values into temp registers */
   1957          emit_mask_scatter(bld, temps_array, index_vec, value,
   1958                            &bld->exec_mask, pred);
   1959       }
   1960       else {
   1961          LLVMValueRef temp_ptr;
   1962          temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
   1963 
   1964          if (tgsi_type_is_64bit(dtype)) {
   1965             LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
   1966                                                          reg->Register.Index,
   1967                                                          chan_index + 1);
   1968             emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
   1969                                   pred, value);
   1970          }
   1971          else
   1972             lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
   1973       }
   1974       break;
   1975 
   1976    case TGSI_FILE_ADDRESS:
   1977       assert(dtype == TGSI_TYPE_SIGNED);
   1978       assert(LLVMTypeOf(value) == int_bld->vec_type);
   1979       value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
   1980       lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
   1981                          bld->addr[reg->Register.Index][chan_index]);
   1982       break;
   1983 
   1984    case TGSI_FILE_PREDICATE:
   1985       assert(LLVMTypeOf(value) == float_bld->vec_type);
   1986       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
   1987       lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
   1988                          bld->preds[reg->Register.Index][chan_index]);
   1989       break;
   1990 
   1991    default:
   1992       assert( 0 );
   1993    }
   1994 
   1995    (void)dtype;
   1996 }
   1997 
   1998 /*
   1999  * Called at the beginning of the translation of each TGSI instruction, to
   2000  * emit some debug code.
   2001  */
   2002 static void
   2003 emit_debug(
   2004    struct lp_build_tgsi_context * bld_base,
   2005    const struct tgsi_full_instruction * inst,
   2006    const struct tgsi_opcode_info * info)
   2007 
   2008 {
   2009    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   2010 
   2011    if (DEBUG_EXECUTION) {
   2012       /*
   2013        * Dump the TGSI instruction.
   2014        */
   2015 
   2016       struct gallivm_state *gallivm = bld_base->base.gallivm;
   2017       char buf[512];
   2018       buf[0] = '$';
   2019       buf[1] = ' ';
   2020       tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
   2021       lp_build_printf(gallivm, buf);
   2022 
   2023       /* Dump the execution mask.
   2024        */
   2025       if (bld->exec_mask.has_mask) {
   2026          lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
   2027       }
   2028    }
   2029 }
   2030 
   2031 static void
   2032 emit_store(
   2033    struct lp_build_tgsi_context * bld_base,
   2034    const struct tgsi_full_instruction * inst,
   2035    const struct tgsi_opcode_info * info,
   2036    LLVMValueRef dst[4])
   2037 
   2038 {
   2039    unsigned chan_index;
   2040    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   2041    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
   2042    if(info->num_dst) {
   2043       LLVMValueRef pred[TGSI_NUM_CHANNELS];
   2044 
   2045       emit_fetch_predicate( bld, inst, pred );
   2046 
   2047       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
   2048 
   2049          if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
   2050              continue;
   2051          emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
   2052       }
   2053    }
   2054 }
   2055 
   2056 static unsigned
   2057 tgsi_to_pipe_tex_target(unsigned tgsi_target)
   2058 {
   2059    switch (tgsi_target) {
   2060    case TGSI_TEXTURE_BUFFER:
   2061       return PIPE_BUFFER;
   2062    case TGSI_TEXTURE_1D:
   2063    case TGSI_TEXTURE_SHADOW1D:
   2064       return PIPE_TEXTURE_1D;
   2065    case TGSI_TEXTURE_2D:
   2066    case TGSI_TEXTURE_SHADOW2D:
   2067    case TGSI_TEXTURE_2D_MSAA:
   2068       return PIPE_TEXTURE_2D;
   2069    case TGSI_TEXTURE_3D:
   2070       return PIPE_TEXTURE_3D;
   2071    case TGSI_TEXTURE_CUBE:
   2072    case TGSI_TEXTURE_SHADOWCUBE:
   2073       return PIPE_TEXTURE_CUBE;
   2074    case TGSI_TEXTURE_RECT:
   2075    case TGSI_TEXTURE_SHADOWRECT:
   2076       return PIPE_TEXTURE_RECT;
   2077    case TGSI_TEXTURE_1D_ARRAY:
   2078    case TGSI_TEXTURE_SHADOW1D_ARRAY:
   2079       return PIPE_TEXTURE_1D_ARRAY;
   2080    case TGSI_TEXTURE_2D_ARRAY:
   2081    case TGSI_TEXTURE_SHADOW2D_ARRAY:
   2082    case TGSI_TEXTURE_2D_ARRAY_MSAA:
   2083       return PIPE_TEXTURE_2D_ARRAY;
   2084    case TGSI_TEXTURE_CUBE_ARRAY:
   2085    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
   2086       return PIPE_TEXTURE_CUBE_ARRAY;
   2087    default:
   2088       assert(0);
   2089       return PIPE_BUFFER;
   2090    }
   2091 }
   2092 
   2093 
   2094 static enum lp_sampler_lod_property
   2095 lp_build_lod_property(
   2096    struct lp_build_tgsi_context *bld_base,
   2097    const struct tgsi_full_instruction *inst,
   2098    unsigned src_op)
   2099 {
   2100    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
   2101    enum lp_sampler_lod_property lod_property;
   2102 
   2103    /*
   2104     * Not much we can do here. We could try catching inputs declared
   2105     * with constant interpolation but not sure it's worth it - since for
   2106     * TEX opcodes as well as FETCH/LD the lod comes from same reg as
   2107     * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
   2108     * like the constant/immediate recognition below.
   2109     * What seems to be of more value would be to recognize temps holding
   2110     * broadcasted scalars but no way we can do it.
   2111     * Tried asking llvm but without any success (using LLVMIsConstant
   2112     * even though this isn't exactly what we'd need), even as simple as
   2113     * IMM[0] UINT32 (0,-1,0,0)
   2114     * MOV TEMP[0] IMM[0].yyyy
   2115     * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
   2116     * doesn't work.
   2117     * This means there's ZERO chance this will ever catch a scalar lod
   2118     * with traditional tex opcodes as well as texel fetches, since the lod
   2119     * comes from the same reg as coords (except some test shaders using
   2120     * constant coords maybe).
   2121     * There's at least hope for sample opcodes as well as size queries.
   2122     */
   2123    if (reg->Register.File == TGSI_FILE_CONSTANT ||
   2124        reg->Register.File == TGSI_FILE_IMMEDIATE) {
   2125       lod_property = LP_SAMPLER_LOD_SCALAR;
   2126    }
   2127    else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
   2128       if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
   2129          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2130       }
   2131       else {
   2132          lod_property = LP_SAMPLER_LOD_PER_QUAD;
   2133       }
   2134    }
   2135    else {
   2136       /* never use scalar (per-quad) lod the results are just too wrong. */
   2137       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2138    }
   2139    return lod_property;
   2140 }
   2141 
   2142 
   2143 /**
   2144  * High-level instruction translators.
   2145  */
   2146 
   2147 static void
   2148 emit_tex( struct lp_build_tgsi_soa_context *bld,
   2149           const struct tgsi_full_instruction *inst,
   2150           enum lp_build_tex_modifier modifier,
   2151           LLVMValueRef *texel,
   2152           unsigned sampler_reg,
   2153           enum lp_sampler_op_type sampler_op)
   2154 {
   2155    unsigned unit = inst->Src[sampler_reg].Register.Index;
   2156    LLVMValueRef oow = NULL;
   2157    LLVMValueRef lod = NULL;
   2158    LLVMValueRef coords[5];
   2159    LLVMValueRef offsets[3] = { NULL };
   2160    struct lp_derivatives derivs;
   2161    struct lp_sampler_params params;
   2162    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
   2163    unsigned num_derivs, num_offsets, i;
   2164    unsigned shadow_coord = 0;
   2165    unsigned layer_coord = 0;
   2166    unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
   2167 
   2168    memset(&params, 0, sizeof(params));
   2169 
   2170    if (!bld->sampler) {
   2171       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
   2172       for (i = 0; i < 4; i++) {
   2173          texel[i] = bld->bld_base.base.undef;
   2174       }
   2175       return;
   2176    }
   2177 
   2178    switch (inst->Texture.Texture) {
   2179    case TGSI_TEXTURE_1D_ARRAY:
   2180       layer_coord = 1;
   2181       /* fallthrough */
   2182    case TGSI_TEXTURE_1D:
   2183       num_offsets = 1;
   2184       num_derivs = 1;
   2185       break;
   2186    case TGSI_TEXTURE_2D_ARRAY:
   2187       layer_coord = 2;
   2188       /* fallthrough */
   2189    case TGSI_TEXTURE_2D:
   2190    case TGSI_TEXTURE_RECT:
   2191       num_offsets = 2;
   2192       num_derivs = 2;
   2193       break;
   2194    case TGSI_TEXTURE_SHADOW1D_ARRAY:
   2195       layer_coord = 1;
   2196       /* fallthrough */
   2197    case TGSI_TEXTURE_SHADOW1D:
   2198       shadow_coord = 2;
   2199       num_offsets = 1;
   2200       num_derivs = 1;
   2201       break;
   2202    case TGSI_TEXTURE_SHADOW2D_ARRAY:
   2203       layer_coord = 2;
   2204       shadow_coord = 3;
   2205       num_offsets = 2;
   2206       num_derivs = 2;
   2207       break;
   2208    case TGSI_TEXTURE_SHADOW2D:
   2209    case TGSI_TEXTURE_SHADOWRECT:
   2210       shadow_coord = 2;
   2211       num_offsets = 2;
   2212       num_derivs = 2;
   2213       break;
   2214    case TGSI_TEXTURE_CUBE:
   2215       num_offsets = 2;
   2216       num_derivs = 3;
   2217       break;
   2218    case TGSI_TEXTURE_3D:
   2219       num_offsets = 3;
   2220       num_derivs = 3;
   2221       break;
   2222    case TGSI_TEXTURE_SHADOWCUBE:
   2223       shadow_coord = 3;
   2224       num_offsets = 2;
   2225       num_derivs = 3;
   2226       break;
   2227    case TGSI_TEXTURE_CUBE_ARRAY:
   2228       num_offsets = 2;
   2229       num_derivs = 3;
   2230       layer_coord = 3;
   2231       break;
   2232    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
   2233       num_offsets = 2;
   2234       num_derivs = 3;
   2235       layer_coord = 3;
   2236       shadow_coord = 4; /* shadow coord special different reg */
   2237       break;
   2238    case TGSI_TEXTURE_2D_MSAA:
   2239    case TGSI_TEXTURE_2D_ARRAY_MSAA:
   2240    default:
   2241       assert(0);
   2242       return;
   2243    }
   2244 
   2245    /* Note lod and especially projected are illegal in a LOT of cases */
   2246    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
   2247        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
   2248       if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
   2249           inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
   2250          /* note that shadow cube array with bias/explicit lod does not exist */
   2251          lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
   2252       }
   2253       else {
   2254          lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
   2255       }
   2256       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
   2257          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2258       }
   2259       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
   2260          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2261       }
   2262       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
   2263    }
   2264 
   2265    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
   2266       oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
   2267       oow = lp_build_rcp(&bld->bld_base.base, oow);
   2268    }
   2269 
   2270    for (i = 0; i < num_derivs; i++) {
   2271       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
   2272       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
   2273          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
   2274    }
   2275    for (i = num_derivs; i < 5; i++) {
   2276       coords[i] = bld->bld_base.base.undef;
   2277    }
   2278 
   2279    /* Layer coord always goes into 3rd slot, except for cube map arrays */
   2280    if (layer_coord) {
   2281       if (layer_coord == 3) {
   2282          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
   2283       }
   2284       else {
   2285          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
   2286       }
   2287       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
   2288          coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
   2289    }
   2290    /* Shadow coord occupies always 5th slot. */
   2291    if (shadow_coord) {
   2292       sample_key |= LP_SAMPLER_SHADOW;
   2293       if (shadow_coord == 4) {
   2294          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
   2295       }
   2296       else {
   2297          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
   2298       }
   2299       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
   2300          coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
   2301    }
   2302 
   2303    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
   2304       unsigned dim;
   2305       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2306       for (dim = 0; dim < num_derivs; ++dim) {
   2307          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
   2308          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
   2309       }
   2310       params.derivs = &derivs;
   2311       /*
   2312        * could also check all src regs if constant but I doubt such
   2313        * cases exist in practice.
   2314        */
   2315       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
   2316          if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
   2317             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2318          }
   2319          else {
   2320             lod_property = LP_SAMPLER_LOD_PER_QUAD;
   2321          }
   2322       }
   2323       else {
   2324          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2325       }
   2326    }
   2327    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
   2328 
   2329    /* we don't handle the 4 offset version of tg4 */
   2330    if (inst->Texture.NumOffsets == 1) {
   2331       unsigned dim;
   2332       sample_key |= LP_SAMPLER_OFFSETS;
   2333       for (dim = 0; dim < num_offsets; dim++) {
   2334          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
   2335       }
   2336    }
   2337 
   2338    params.type = bld->bld_base.base.type;
   2339    params.sample_key = sample_key;
   2340    params.texture_index = unit;
   2341    params.sampler_index = unit;
   2342    params.context_ptr = bld->context_ptr;
   2343    params.thread_data_ptr = bld->thread_data_ptr;
   2344    params.coords = coords;
   2345    params.offsets = offsets;
   2346    params.lod = lod;
   2347    params.texel = texel;
   2348 
   2349    bld->sampler->emit_tex_sample(bld->sampler,
   2350                                  bld->bld_base.base.gallivm,
   2351                                  &params);
   2352 }
   2353 
   2354 static void
   2355 emit_sample(struct lp_build_tgsi_soa_context *bld,
   2356             const struct tgsi_full_instruction *inst,
   2357             enum lp_build_tex_modifier modifier,
   2358             boolean compare,
   2359             LLVMValueRef *texel)
   2360 {
   2361    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   2362    unsigned texture_unit, sampler_unit;
   2363    LLVMValueRef lod = NULL;
   2364    LLVMValueRef coords[5];
   2365    LLVMValueRef offsets[3] = { NULL };
   2366    struct lp_derivatives derivs;
   2367    struct lp_sampler_params params;
   2368    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
   2369 
   2370    unsigned num_offsets, num_derivs, i;
   2371    unsigned layer_coord = 0;
   2372    unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT;
   2373 
   2374    memset(&params, 0, sizeof(params));
   2375 
   2376    if (!bld->sampler) {
   2377       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
   2378       for (i = 0; i < 4; i++) {
   2379          texel[i] = bld->bld_base.base.undef;
   2380       }
   2381       return;
   2382    }
   2383 
   2384    /*
   2385     * unlike old-style tex opcodes the texture/sampler indices
   2386     * always come from src1 and src2 respectively.
   2387     */
   2388    texture_unit = inst->Src[1].Register.Index;
   2389    sampler_unit = inst->Src[2].Register.Index;
   2390 
   2391    /*
   2392     * Note inst->Texture.Texture will contain the number of offsets,
   2393     * however the target information is NOT there and comes from the
   2394     * declared sampler views instead.
   2395     */
   2396    switch (bld->sv[texture_unit].Resource) {
   2397    case TGSI_TEXTURE_1D:
   2398       num_offsets = 1;
   2399       num_derivs = 1;
   2400       break;
   2401    case TGSI_TEXTURE_1D_ARRAY:
   2402       layer_coord = 1;
   2403       num_offsets = 1;
   2404       num_derivs = 1;
   2405       break;
   2406    case TGSI_TEXTURE_2D:
   2407    case TGSI_TEXTURE_RECT:
   2408       num_offsets = 2;
   2409       num_derivs = 2;
   2410       break;
   2411    case TGSI_TEXTURE_2D_ARRAY:
   2412       layer_coord = 2;
   2413       num_offsets = 2;
   2414       num_derivs = 2;
   2415       break;
   2416    case TGSI_TEXTURE_CUBE:
   2417       num_offsets = 2;
   2418       num_derivs = 3;
   2419       break;
   2420    case TGSI_TEXTURE_3D:
   2421       num_offsets = 3;
   2422       num_derivs = 3;
   2423       break;
   2424    case TGSI_TEXTURE_CUBE_ARRAY:
   2425       layer_coord = 3;
   2426       num_offsets = 2;
   2427       num_derivs = 3;
   2428       break;
   2429    default:
   2430       assert(0);
   2431       return;
   2432    }
   2433 
   2434    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
   2435        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
   2436       lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
   2437       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
   2438          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2439       }
   2440       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
   2441          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2442       }
   2443       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
   2444    }
   2445    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
   2446       /* XXX might be better to explicitly pass the level zero information */
   2447       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2448       lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
   2449    }
   2450 
   2451    for (i = 0; i < num_derivs; i++) {
   2452       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
   2453    }
   2454    for (i = num_derivs; i < 5; i++) {
   2455       coords[i] = bld->bld_base.base.undef;
   2456    }
   2457 
   2458    /* Layer coord always goes into 3rd slot, except for cube map arrays */
   2459    if (layer_coord) {
   2460       if (layer_coord == 3)
   2461          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
   2462       else
   2463          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
   2464    }
   2465    /* Shadow coord occupies always 5th slot. */
   2466    if (compare) {
   2467       sample_key |= LP_SAMPLER_SHADOW;
   2468       coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
   2469    }
   2470 
   2471    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
   2472       unsigned dim;
   2473       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2474       for (dim = 0; dim < num_derivs; ++dim) {
   2475          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
   2476          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
   2477       }
   2478       params.derivs = &derivs;
   2479       /*
   2480        * could also check all src regs if constant but I doubt such
   2481        * cases exist in practice.
   2482        */
   2483       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
   2484          if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
   2485             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2486          }
   2487          else {
   2488             lod_property = LP_SAMPLER_LOD_PER_QUAD;
   2489          }
   2490       }
   2491       else {
   2492          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
   2493       }
   2494    }
   2495 
   2496    /* some advanced gather instructions (txgo) would require 4 offsets */
   2497    if (inst->Texture.NumOffsets == 1) {
   2498       unsigned dim;
   2499       sample_key |= LP_SAMPLER_OFFSETS;
   2500       for (dim = 0; dim < num_offsets; dim++) {
   2501          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
   2502       }
   2503    }
   2504    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
   2505 
   2506    params.type = bld->bld_base.base.type;
   2507    params.sample_key = sample_key;
   2508    params.texture_index = texture_unit;
   2509    params.sampler_index = sampler_unit;
   2510    params.context_ptr = bld->context_ptr;
   2511    params.thread_data_ptr = bld->thread_data_ptr;
   2512    params.coords = coords;
   2513    params.offsets = offsets;
   2514    params.lod = lod;
   2515    params.texel = texel;
   2516 
   2517    bld->sampler->emit_tex_sample(bld->sampler,
   2518                                  bld->bld_base.base.gallivm,
   2519                                  &params);
   2520 
   2521    if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
   2522        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
   2523        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
   2524        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
   2525       unsigned char swizzles[4];
   2526       swizzles[0] = inst->Src[1].Register.SwizzleX;
   2527       swizzles[1] = inst->Src[1].Register.SwizzleY;
   2528       swizzles[2] = inst->Src[1].Register.SwizzleZ;
   2529       swizzles[3] = inst->Src[1].Register.SwizzleW;
   2530 
   2531       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
   2532    }
   2533 }
   2534 
   2535 static void
   2536 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
   2537                    const struct tgsi_full_instruction *inst,
   2538                    LLVMValueRef *texel,
   2539                    boolean is_samplei)
   2540 {
   2541    unsigned unit, target;
   2542    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
   2543    LLVMValueRef explicit_lod = NULL;
   2544    LLVMValueRef coords[5];
   2545    LLVMValueRef offsets[3] = { NULL };
   2546    struct lp_sampler_params params;
   2547    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
   2548    unsigned dims, i;
   2549    unsigned layer_coord = 0;
   2550    unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
   2551 
   2552    memset(&params, 0, sizeof(params));
   2553 
   2554    if (!bld->sampler) {
   2555       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
   2556       for (i = 0; i < 4; i++) {
   2557          texel[i] = coord_undef;
   2558       }
   2559       return;
   2560    }
   2561 
   2562    unit = inst->Src[1].Register.Index;
   2563 
   2564    if (is_samplei) {
   2565       target = bld->sv[unit].Resource;
   2566    }
   2567    else {
   2568       target = inst->Texture.Texture;
   2569    }
   2570 
   2571    switch (target) {
   2572    case TGSI_TEXTURE_1D:
   2573    case TGSI_TEXTURE_BUFFER:
   2574       dims = 1;
   2575       break;
   2576    case TGSI_TEXTURE_1D_ARRAY:
   2577       layer_coord = 1;
   2578       dims = 1;
   2579       break;
   2580    case TGSI_TEXTURE_2D:
   2581    case TGSI_TEXTURE_RECT:
   2582    case TGSI_TEXTURE_2D_MSAA:
   2583       dims = 2;
   2584       break;
   2585    case TGSI_TEXTURE_2D_ARRAY:
   2586    case TGSI_TEXTURE_2D_ARRAY_MSAA:
   2587       layer_coord = 2;
   2588       dims = 2;
   2589       break;
   2590    case TGSI_TEXTURE_3D:
   2591       dims = 3;
   2592       break;
   2593    default:
   2594       assert(0);
   2595       return;
   2596    }
   2597 
   2598    /* always have lod except for buffers and msaa targets ? */
   2599    if (target != TGSI_TEXTURE_BUFFER &&
   2600        target != TGSI_TEXTURE_2D_MSAA &&
   2601        target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
   2602       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
   2603       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
   2604       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
   2605    }
   2606    /*
   2607     * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
   2608     * would be the sample index.
   2609     */
   2610 
   2611    for (i = 0; i < dims; i++) {
   2612       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
   2613    }
   2614    /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
   2615    for (i = dims; i < 5; i++) {
   2616       coords[i] = coord_undef;
   2617    }
   2618    if (layer_coord)
   2619       coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
   2620 
   2621    if (inst->Texture.NumOffsets == 1) {
   2622       unsigned dim;
   2623       sample_key |= LP_SAMPLER_OFFSETS;
   2624       for (dim = 0; dim < dims; dim++) {
   2625          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
   2626       }
   2627    }
   2628    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
   2629 
   2630    params.type = bld->bld_base.base.type;
   2631    params.sample_key = sample_key;
   2632    params.texture_index = unit;
   2633    /*
   2634     * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
   2635     * and trigger some assertions with d3d10 where the sampler view number
   2636     * can exceed this.
   2637     */
   2638    params.sampler_index = 0;
   2639    params.context_ptr = bld->context_ptr;
   2640    params.thread_data_ptr = bld->thread_data_ptr;
   2641    params.coords = coords;
   2642    params.offsets = offsets;
   2643    params.derivs = NULL;
   2644    params.lod = explicit_lod;
   2645    params.texel = texel;
   2646 
   2647    bld->sampler->emit_tex_sample(bld->sampler,
   2648                                  bld->bld_base.base.gallivm,
   2649                                  &params);
   2650 
   2651    if (is_samplei &&
   2652        (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
   2653         inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
   2654         inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
   2655         inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
   2656       unsigned char swizzles[4];
   2657       swizzles[0] = inst->Src[1].Register.SwizzleX;
   2658       swizzles[1] = inst->Src[1].Register.SwizzleY;
   2659       swizzles[2] = inst->Src[1].Register.SwizzleZ;
   2660       swizzles[3] = inst->Src[1].Register.SwizzleW;
   2661 
   2662       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
   2663    }
   2664 }
   2665 
   2666 static void
   2667 emit_size_query( struct lp_build_tgsi_soa_context *bld,
   2668                  const struct tgsi_full_instruction *inst,
   2669                  LLVMValueRef *sizes_out,
   2670                  boolean is_sviewinfo)
   2671 {
   2672    LLVMValueRef explicit_lod;
   2673    enum lp_sampler_lod_property lod_property;
   2674    unsigned has_lod;
   2675    unsigned i;
   2676    unsigned unit = inst->Src[1].Register.Index;
   2677    unsigned target, pipe_target;
   2678    struct lp_sampler_size_query_params params;
   2679 
   2680    if (is_sviewinfo) {
   2681       target = bld->sv[unit].Resource;
   2682    }
   2683    else {
   2684       target = inst->Texture.Texture;
   2685    }
   2686    switch (target) {
   2687    case TGSI_TEXTURE_BUFFER:
   2688    case TGSI_TEXTURE_RECT:
   2689    case TGSI_TEXTURE_SHADOWRECT:
   2690       has_lod = 0;
   2691       break;
   2692    default:
   2693       has_lod = 1;
   2694       break;
   2695    }
   2696 
   2697    if (!bld->sampler) {
   2698       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
   2699       for (i = 0; i < 4; i++)
   2700          sizes_out[i] = bld->bld_base.int_bld.undef;
   2701       return;
   2702    }
   2703 
   2704    if (has_lod) {
   2705       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
   2706       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
   2707    }
   2708    else {
   2709       explicit_lod = NULL;
   2710       lod_property = LP_SAMPLER_LOD_SCALAR;
   2711    }
   2712 
   2713 
   2714    pipe_target = tgsi_to_pipe_tex_target(target);
   2715 
   2716    params.int_type = bld->bld_base.int_bld.type;
   2717    params.texture_unit = unit;
   2718    params.target = pipe_target;
   2719    params.context_ptr = bld->context_ptr;
   2720    params.is_sviewinfo = TRUE;
   2721    params.lod_property = lod_property;
   2722    params.explicit_lod = explicit_lod;
   2723    params.sizes_out = sizes_out;
   2724 
   2725    bld->sampler->emit_size_query(bld->sampler,
   2726                                  bld->bld_base.base.gallivm,
   2727                                  &params);
   2728 }
   2729 
   2730 static boolean
   2731 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
   2732                    int pc)
   2733 {
   2734    unsigned i;
   2735 
   2736    for (i = 0; i < 5; i++) {
   2737       unsigned opcode;
   2738 
   2739       if (pc + i >= bld->bld_base.info->num_instructions)
   2740          return TRUE;
   2741 
   2742       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
   2743 
   2744       if (opcode == TGSI_OPCODE_END)
   2745          return TRUE;
   2746 
   2747       if (opcode == TGSI_OPCODE_TEX ||
   2748          opcode == TGSI_OPCODE_TXP ||
   2749          opcode == TGSI_OPCODE_TXD ||
   2750          opcode == TGSI_OPCODE_TXB ||
   2751          opcode == TGSI_OPCODE_TXL ||
   2752          opcode == TGSI_OPCODE_TXF ||
   2753          opcode == TGSI_OPCODE_TXQ ||
   2754          opcode == TGSI_OPCODE_TEX2 ||
   2755          opcode == TGSI_OPCODE_TXB2 ||
   2756          opcode == TGSI_OPCODE_TXL2 ||
   2757          opcode == TGSI_OPCODE_SAMPLE ||
   2758          opcode == TGSI_OPCODE_SAMPLE_B ||
   2759          opcode == TGSI_OPCODE_SAMPLE_C ||
   2760          opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
   2761          opcode == TGSI_OPCODE_SAMPLE_D ||
   2762          opcode == TGSI_OPCODE_SAMPLE_I ||
   2763          opcode == TGSI_OPCODE_SAMPLE_I_MS ||
   2764          opcode == TGSI_OPCODE_SAMPLE_L ||
   2765          opcode == TGSI_OPCODE_SVIEWINFO ||
   2766          opcode == TGSI_OPCODE_CAL ||
   2767          opcode == TGSI_OPCODE_CALLNZ ||
   2768          opcode == TGSI_OPCODE_IF ||
   2769          opcode == TGSI_OPCODE_UIF ||
   2770          opcode == TGSI_OPCODE_BGNLOOP ||
   2771          opcode == TGSI_OPCODE_SWITCH)
   2772          return FALSE;
   2773    }
   2774 
   2775    return TRUE;
   2776 }
   2777 
   2778 
   2779 
   2780 /**
   2781  * Kill fragment if any of the src register values are negative.
   2782  */
   2783 static void
   2784 emit_kill_if(
   2785    struct lp_build_tgsi_soa_context *bld,
   2786    const struct tgsi_full_instruction *inst,
   2787    int pc)
   2788 {
   2789    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   2790    const struct tgsi_full_src_register *reg = &inst->Src[0];
   2791    LLVMValueRef terms[TGSI_NUM_CHANNELS];
   2792    LLVMValueRef mask;
   2793    unsigned chan_index;
   2794 
   2795    memset(&terms, 0, sizeof terms);
   2796 
   2797    TGSI_FOR_EACH_CHANNEL( chan_index ) {
   2798       unsigned swizzle;
   2799 
   2800       /* Unswizzle channel */
   2801       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
   2802 
   2803       /* Check if the component has not been already tested. */
   2804       assert(swizzle < TGSI_NUM_CHANNELS);
   2805       if( !terms[swizzle] )
   2806          /* TODO: change the comparison operator instead of setting the sign */
   2807          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
   2808    }
   2809 
   2810    mask = NULL;
   2811    TGSI_FOR_EACH_CHANNEL( chan_index ) {
   2812       if(terms[chan_index]) {
   2813          LLVMValueRef chan_mask;
   2814 
   2815          /*
   2816           * If term < 0 then mask = 0 else mask = ~0.
   2817           */
   2818          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
   2819 
   2820          if(mask)
   2821             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
   2822          else
   2823             mask = chan_mask;
   2824       }
   2825    }
   2826 
   2827    if (bld->exec_mask.has_mask) {
   2828       LLVMValueRef invmask;
   2829       invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
   2830       mask = LLVMBuildOr(builder, mask, invmask, "");
   2831    }
   2832 
   2833    lp_build_mask_update(bld->mask, mask);
   2834    if (!near_end_of_shader(bld, pc))
   2835       lp_build_mask_check(bld->mask);
   2836 }
   2837 
   2838 
   2839 /**
   2840  * Unconditional fragment kill.
   2841  * The only predication is the execution mask which will apply if
   2842  * we're inside a loop or conditional.
   2843  */
   2844 static void
   2845 emit_kill(struct lp_build_tgsi_soa_context *bld,
   2846           int pc)
   2847 {
   2848    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   2849    LLVMValueRef mask;
   2850 
   2851    /* For those channels which are "alive", disable fragment shader
   2852     * execution.
   2853     */
   2854    if (bld->exec_mask.has_mask) {
   2855       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
   2856    }
   2857    else {
   2858       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
   2859       mask = zero;
   2860    }
   2861 
   2862    lp_build_mask_update(bld->mask, mask);
   2863 
   2864    if (!near_end_of_shader(bld, pc))
   2865       lp_build_mask_check(bld->mask);
   2866 }
   2867 
   2868 
   2869 /**
   2870  * Emit code which will dump the value of all the temporary registers
   2871  * to stdout.
   2872  */
   2873 static void
   2874 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
   2875                unsigned file)
   2876 {
   2877    const struct tgsi_shader_info *info = bld->bld_base.info;
   2878    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   2879    LLVMBuilderRef builder = gallivm->builder;
   2880    LLVMValueRef reg_ptr;
   2881    int index;
   2882    int max_index = info->file_max[file];
   2883 
   2884    /*
   2885     * Some register files, particularly constants, can be very large,
   2886     * and dumping everything could make this unusably slow.
   2887     */
   2888    max_index = MIN2(max_index, 32);
   2889 
   2890    for (index = 0; index <= max_index; index++) {
   2891       LLVMValueRef res;
   2892       unsigned mask;
   2893       int chan;
   2894 
   2895       if (index < 8 * sizeof(unsigned) &&
   2896           (info->file_mask[file] & (1u << index)) == 0)  {
   2897          /* This was not declared.*/
   2898          continue;
   2899       }
   2900 
   2901       if (file == TGSI_FILE_INPUT) {
   2902          mask = info->input_usage_mask[index];
   2903       } else {
   2904          mask = TGSI_WRITEMASK_XYZW;
   2905       }
   2906 
   2907       for (chan = 0; chan < 4; chan++) {
   2908          if ((mask & (1 << chan)) == 0) {
   2909             /* This channel is not used.*/
   2910             continue;
   2911          }
   2912 
   2913          if (file == TGSI_FILE_CONSTANT) {
   2914             struct tgsi_full_src_register reg;
   2915             memset(&reg, 0, sizeof reg);
   2916             reg.Register.File = file;
   2917             reg.Register.Index = index;
   2918             reg.Register.SwizzleX = 0;
   2919             reg.Register.SwizzleY = 1;
   2920             reg.Register.SwizzleZ = 2;
   2921             reg.Register.SwizzleW = 3;
   2922 
   2923             res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
   2924             if (!res) {
   2925                continue;
   2926             }
   2927          } else if (file == TGSI_FILE_INPUT) {
   2928             res = bld->inputs[index][chan];
   2929             if (!res) {
   2930                continue;
   2931             }
   2932          } else if (file == TGSI_FILE_TEMPORARY) {
   2933             reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
   2934             assert(reg_ptr);
   2935             res = LLVMBuildLoad(builder, reg_ptr, "");
   2936          } else if (file == TGSI_FILE_OUTPUT) {
   2937             reg_ptr = lp_get_output_ptr(bld, index, chan);
   2938             assert(reg_ptr);
   2939             res = LLVMBuildLoad(builder, reg_ptr, "");
   2940          } else {
   2941             assert(0);
   2942             continue;
   2943          }
   2944 
   2945          emit_dump_reg(gallivm, file, index, chan, res);
   2946       }
   2947    }
   2948 }
   2949 
   2950 
   2951 
   2952 void
   2953 lp_emit_declaration_soa(
   2954    struct lp_build_tgsi_context *bld_base,
   2955    const struct tgsi_full_declaration *decl)
   2956 {
   2957    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
   2958    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   2959    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
   2960    const unsigned first = decl->Range.First;
   2961    const unsigned last = decl->Range.Last;
   2962    unsigned idx, i;
   2963 
   2964    assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
   2965 
   2966    switch (decl->Declaration.File) {
   2967    case TGSI_FILE_TEMPORARY:
   2968       if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
   2969          assert(last < LP_MAX_INLINED_TEMPS);
   2970          for (idx = first; idx <= last; ++idx) {
   2971             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   2972                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
   2973          }
   2974       }
   2975       break;
   2976 
   2977    case TGSI_FILE_OUTPUT:
   2978       if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
   2979          for (idx = first; idx <= last; ++idx) {
   2980             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   2981                bld->outputs[idx][i] = lp_build_alloca(gallivm,
   2982                                                       vec_type, "output");
   2983          }
   2984       }
   2985       break;
   2986 
   2987    case TGSI_FILE_ADDRESS:
   2988       /* ADDR registers are only allocated with an integer LLVM IR type,
   2989        * as they are guaranteed to always have integers.
   2990        * XXX: Not sure if this exception is worthwhile (or the whole idea of
   2991        * an ADDR register for that matter).
   2992        */
   2993       assert(last < LP_MAX_TGSI_ADDRS);
   2994       for (idx = first; idx <= last; ++idx) {
   2995          assert(idx < LP_MAX_TGSI_ADDRS);
   2996          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   2997             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
   2998       }
   2999       break;
   3000 
   3001    case TGSI_FILE_PREDICATE:
   3002       assert(last < LP_MAX_TGSI_PREDS);
   3003       for (idx = first; idx <= last; ++idx) {
   3004          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   3005             bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
   3006                                                  "predicate");
   3007       }
   3008       break;
   3009 
   3010    case TGSI_FILE_SAMPLER_VIEW:
   3011       /*
   3012        * The target stored here MUST match whatever there actually
   3013        * is in the set sampler views (what about return type?).
   3014        */
   3015       assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
   3016       for (idx = first; idx <= last; ++idx) {
   3017          bld->sv[idx] = decl->SamplerView;
   3018       }
   3019       break;
   3020 
   3021    case TGSI_FILE_CONSTANT:
   3022    {
   3023       /*
   3024        * We could trivially fetch the per-buffer pointer when fetching the
   3025        * constant, relying on llvm to figure out it's always the same pointer
   3026        * anyway. However, doing so results in a huge (more than factor of 10)
   3027        * slowdown in llvm compilation times for some (but not all) shaders
   3028        * (more specifically, the IR optimization spends way more time in
   3029        * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
   3030        */
   3031       unsigned idx2D = decl->Dim.Index2D;
   3032       LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
   3033       assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
   3034       bld->consts[idx2D] =
   3035          lp_build_array_get(gallivm, bld->consts_ptr, index2D);
   3036       bld->consts_sizes[idx2D] =
   3037          lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
   3038    }
   3039       break;
   3040 
   3041    default:
   3042       /* don't need to declare other vars */
   3043       break;
   3044    }
   3045 }
   3046 
   3047 
   3048 void lp_emit_immediate_soa(
   3049    struct lp_build_tgsi_context *bld_base,
   3050    const struct tgsi_full_immediate *imm)
   3051 {
   3052    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
   3053    struct gallivm_state * gallivm = bld_base->base.gallivm;
   3054    LLVMValueRef imms[4];
   3055    unsigned i;
   3056    const uint size = imm->Immediate.NrTokens - 1;
   3057    assert(size <= 4);
   3058    switch (imm->Immediate.DataType) {
   3059    case TGSI_IMM_FLOAT32:
   3060       for( i = 0; i < size; ++i )
   3061          imms[i] =
   3062                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
   3063 
   3064       break;
   3065    case TGSI_IMM_FLOAT64:
   3066    case TGSI_IMM_UINT64:
   3067    case TGSI_IMM_INT64:
   3068    case TGSI_IMM_UINT32:
   3069       for( i = 0; i < size; ++i ) {
   3070          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
   3071          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
   3072       }
   3073 
   3074       break;
   3075    case TGSI_IMM_INT32:
   3076       for( i = 0; i < size; ++i ) {
   3077          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
   3078          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
   3079       }
   3080 
   3081       break;
   3082    }
   3083    for( i = size; i < 4; ++i )
   3084       imms[i] = bld_base->base.undef;
   3085 
   3086    if (bld->use_immediates_array) {
   3087       unsigned index = bld->num_immediates;
   3088       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   3089       LLVMBuilderRef builder = gallivm->builder;
   3090 
   3091       assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
   3092       for (i = 0; i < 4; ++i ) {
   3093          LLVMValueRef lindex = lp_build_const_int32(
   3094                   bld->bld_base.base.gallivm, index * 4 + i);
   3095          LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
   3096                                              bld->imms_array, &lindex, 1, "");
   3097          LLVMBuildStore(builder, imms[i], imm_ptr);
   3098       }
   3099    } else {
   3100       /* simply copy the immediate values into the next immediates[] slot */
   3101       unsigned i;
   3102       assert(imm->Immediate.NrTokens - 1 <= 4);
   3103       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
   3104 
   3105       for(i = 0; i < 4; ++i )
   3106          bld->immediates[bld->num_immediates][i] = imms[i];
   3107 
   3108       if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
   3109          unsigned index = bld->num_immediates;
   3110          struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   3111          LLVMBuilderRef builder = gallivm->builder;
   3112          for (i = 0; i < 4; ++i ) {
   3113             LLVMValueRef lindex = lp_build_const_int32(
   3114                      bld->bld_base.base.gallivm, index * 4 + i);
   3115             LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
   3116                                                 bld->imms_array, &lindex, 1, "");
   3117             LLVMBuildStore(builder,
   3118                            bld->immediates[index][i],
   3119                            imm_ptr);
   3120          }
   3121       }
   3122    }
   3123 
   3124    bld->num_immediates++;
   3125 }
   3126 
   3127 static void
   3128 ddx_emit(
   3129    const struct lp_build_tgsi_action * action,
   3130    struct lp_build_tgsi_context * bld_base,
   3131    struct lp_build_emit_data * emit_data)
   3132 {
   3133    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3134 
   3135    emit_fetch_deriv(bld, emit_data->args[0], NULL,
   3136                     &emit_data->output[emit_data->chan], NULL);
   3137 }
   3138 
   3139 static void
   3140 ddy_emit(
   3141    const struct lp_build_tgsi_action * action,
   3142    struct lp_build_tgsi_context * bld_base,
   3143    struct lp_build_emit_data * emit_data)
   3144 {
   3145    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3146 
   3147    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
   3148                     &emit_data->output[emit_data->chan]);
   3149 }
   3150 
   3151 static void
   3152 kill_emit(
   3153    const struct lp_build_tgsi_action * action,
   3154    struct lp_build_tgsi_context * bld_base,
   3155    struct lp_build_emit_data * emit_data)
   3156 {
   3157    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3158 
   3159    emit_kill(bld, bld_base->pc - 1);
   3160 }
   3161 
   3162 static void
   3163 kill_if_emit(
   3164    const struct lp_build_tgsi_action * action,
   3165    struct lp_build_tgsi_context * bld_base,
   3166    struct lp_build_emit_data * emit_data)
   3167 {
   3168    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3169 
   3170    emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
   3171 }
   3172 
   3173 static void
   3174 tex_emit(
   3175    const struct lp_build_tgsi_action * action,
   3176    struct lp_build_tgsi_context * bld_base,
   3177    struct lp_build_emit_data * emit_data)
   3178 {
   3179    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3180 
   3181    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3182             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
   3183 }
   3184 
   3185 static void
   3186 tex2_emit(
   3187    const struct lp_build_tgsi_action * action,
   3188    struct lp_build_tgsi_context * bld_base,
   3189    struct lp_build_emit_data * emit_data)
   3190 {
   3191    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3192 
   3193    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3194             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
   3195 }
   3196 
   3197 static void
   3198 txb_emit(
   3199    const struct lp_build_tgsi_action * action,
   3200    struct lp_build_tgsi_context * bld_base,
   3201    struct lp_build_emit_data * emit_data)
   3202 {
   3203    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3204 
   3205    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
   3206             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
   3207 }
   3208 
   3209 static void
   3210 txb2_emit(
   3211    const struct lp_build_tgsi_action * action,
   3212    struct lp_build_tgsi_context * bld_base,
   3213    struct lp_build_emit_data * emit_data)
   3214 {
   3215    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3216 
   3217    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
   3218             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
   3219 }
   3220 
   3221 static void
   3222 txd_emit(
   3223    const struct lp_build_tgsi_action * action,
   3224    struct lp_build_tgsi_context * bld_base,
   3225    struct lp_build_emit_data * emit_data)
   3226 {
   3227    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3228 
   3229    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
   3230             emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
   3231 }
   3232 
   3233 static void
   3234 txl_emit(
   3235    const struct lp_build_tgsi_action * action,
   3236    struct lp_build_tgsi_context * bld_base,
   3237    struct lp_build_emit_data * emit_data)
   3238 {
   3239    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3240 
   3241    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
   3242             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
   3243 }
   3244 
   3245 static void
   3246 txl2_emit(
   3247    const struct lp_build_tgsi_action * action,
   3248    struct lp_build_tgsi_context * bld_base,
   3249    struct lp_build_emit_data * emit_data)
   3250 {
   3251    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3252 
   3253    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
   3254             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
   3255 }
   3256 
   3257 static void
   3258 txp_emit(
   3259    const struct lp_build_tgsi_action * action,
   3260    struct lp_build_tgsi_context * bld_base,
   3261    struct lp_build_emit_data * emit_data)
   3262 {
   3263    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3264 
   3265    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
   3266             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
   3267 }
   3268 
   3269 static void
   3270 tg4_emit(
   3271    const struct lp_build_tgsi_action * action,
   3272    struct lp_build_tgsi_context * bld_base,
   3273    struct lp_build_emit_data * emit_data)
   3274 {
   3275    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3276 
   3277    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3278             emit_data->output, 2, LP_SAMPLER_OP_GATHER);
   3279 }
   3280 
   3281 static void
   3282 txq_emit(
   3283    const struct lp_build_tgsi_action * action,
   3284    struct lp_build_tgsi_context * bld_base,
   3285    struct lp_build_emit_data * emit_data)
   3286 {
   3287    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3288 
   3289    emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
   3290 }
   3291 
   3292 static void
   3293 txf_emit(
   3294    const struct lp_build_tgsi_action * action,
   3295    struct lp_build_tgsi_context * bld_base,
   3296    struct lp_build_emit_data * emit_data)
   3297 {
   3298    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3299 
   3300    emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
   3301 }
   3302 
   3303 static void
   3304 sample_i_emit(
   3305    const struct lp_build_tgsi_action * action,
   3306    struct lp_build_tgsi_context * bld_base,
   3307    struct lp_build_emit_data * emit_data)
   3308 {
   3309    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3310 
   3311    emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
   3312 }
   3313 
   3314 static void
   3315 sample_emit(
   3316    const struct lp_build_tgsi_action * action,
   3317    struct lp_build_tgsi_context * bld_base,
   3318    struct lp_build_emit_data * emit_data)
   3319 {
   3320    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3321 
   3322    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3323                FALSE, emit_data->output);
   3324 }
   3325 
   3326 static void
   3327 sample_b_emit(
   3328    const struct lp_build_tgsi_action * action,
   3329    struct lp_build_tgsi_context * bld_base,
   3330    struct lp_build_emit_data * emit_data)
   3331 {
   3332    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3333 
   3334    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
   3335                FALSE, emit_data->output);
   3336 }
   3337 
   3338 static void
   3339 sample_c_emit(
   3340    const struct lp_build_tgsi_action * action,
   3341    struct lp_build_tgsi_context * bld_base,
   3342    struct lp_build_emit_data * emit_data)
   3343 {
   3344    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3345 
   3346    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
   3347                TRUE, emit_data->output);
   3348 }
   3349 
   3350 static void
   3351 sample_c_lz_emit(
   3352    const struct lp_build_tgsi_action * action,
   3353    struct lp_build_tgsi_context * bld_base,
   3354    struct lp_build_emit_data * emit_data)
   3355 {
   3356    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3357 
   3358    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
   3359                TRUE, emit_data->output);
   3360 }
   3361 
   3362 static void
   3363 sample_d_emit(
   3364    const struct lp_build_tgsi_action * action,
   3365    struct lp_build_tgsi_context * bld_base,
   3366    struct lp_build_emit_data * emit_data)
   3367 {
   3368    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3369 
   3370    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
   3371                FALSE, emit_data->output);
   3372 }
   3373 
   3374 static void
   3375 sample_l_emit(
   3376    const struct lp_build_tgsi_action * action,
   3377    struct lp_build_tgsi_context * bld_base,
   3378    struct lp_build_emit_data * emit_data)
   3379 {
   3380    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3381 
   3382    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
   3383                FALSE, emit_data->output);
   3384 }
   3385 
   3386 static void
   3387 sviewinfo_emit(
   3388    const struct lp_build_tgsi_action * action,
   3389    struct lp_build_tgsi_context * bld_base,
   3390    struct lp_build_emit_data * emit_data)
   3391 {
   3392    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3393 
   3394    emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
   3395 }
   3396 
   3397 static LLVMValueRef
   3398 mask_vec(struct lp_build_tgsi_context *bld_base)
   3399 {
   3400    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3401    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   3402    struct lp_exec_mask *exec_mask = &bld->exec_mask;
   3403 
   3404    if (!exec_mask->has_mask) {
   3405       return lp_build_mask_value(bld->mask);
   3406    }
   3407    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
   3408                        exec_mask->exec_mask, "");
   3409 }
   3410 
   3411 static void
   3412 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
   3413                           LLVMValueRef ptr,
   3414                           LLVMValueRef mask)
   3415 {
   3416    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
   3417    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
   3418 
   3419    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
   3420 
   3421    LLVMBuildStore(builder, current_vec, ptr);
   3422 }
   3423 
   3424 static void
   3425 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
   3426                              LLVMValueRef ptr,
   3427                              LLVMValueRef mask)
   3428 {
   3429    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
   3430    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
   3431 
   3432    current_vec = lp_build_select(&bld_base->uint_bld,
   3433                                  mask,
   3434                                  bld_base->uint_bld.zero,
   3435                                  current_vec);
   3436 
   3437    LLVMBuildStore(builder, current_vec, ptr);
   3438 }
   3439 
   3440 static LLVMValueRef
   3441 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
   3442                                   LLVMValueRef current_mask_vec,
   3443                                   LLVMValueRef total_emitted_vertices_vec)
   3444 {
   3445    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   3446    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
   3447    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
   3448                                         total_emitted_vertices_vec,
   3449                                         bld->max_output_vertices_vec);
   3450 
   3451    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
   3452 }
   3453 
   3454 static void
   3455 emit_vertex(
   3456    const struct lp_build_tgsi_action * action,
   3457    struct lp_build_tgsi_context * bld_base,
   3458    struct lp_build_emit_data * emit_data)
   3459 {
   3460    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3461    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   3462 
   3463    if (bld->gs_iface->emit_vertex) {
   3464       LLVMValueRef mask = mask_vec(bld_base);
   3465       LLVMValueRef total_emitted_vertices_vec =
   3466          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
   3467       mask = clamp_mask_to_max_output_vertices(bld, mask,
   3468                                                total_emitted_vertices_vec);
   3469       gather_outputs(bld);
   3470       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
   3471                                  bld->outputs,
   3472                                  total_emitted_vertices_vec);
   3473       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
   3474                                 mask);
   3475       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
   3476                                 mask);
   3477 #if DUMP_GS_EMITS
   3478       lp_build_print_value(bld->bld_base.base.gallivm,
   3479                            " +++ emit vertex masked ones = ",
   3480                            mask);
   3481       lp_build_print_value(bld->bld_base.base.gallivm,
   3482                            " +++ emit vertex emitted = ",
   3483                            total_emitted_vertices_vec);
   3484 #endif
   3485    }
   3486 }
   3487 
   3488 
   3489 static void
   3490 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
   3491                      LLVMValueRef mask)
   3492 {
   3493    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3494    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   3495 
   3496    if (bld->gs_iface->end_primitive) {
   3497       struct lp_build_context *uint_bld = &bld_base->uint_bld;
   3498       LLVMValueRef emitted_vertices_vec =
   3499          LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
   3500       LLVMValueRef emitted_prims_vec =
   3501          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
   3502 
   3503       LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
   3504                                                emitted_vertices_vec,
   3505                                                uint_bld->zero);
   3506       /* We need to combine the current execution mask with the mask
   3507          telling us which, if any, execution slots actually have
   3508          unemitted primitives, this way we make sure that end_primitives
   3509          executes only on the paths that have unflushed vertices */
   3510       mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
   3511 
   3512       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
   3513                                    emitted_vertices_vec,
   3514                                    emitted_prims_vec);
   3515 
   3516 #if DUMP_GS_EMITS
   3517       lp_build_print_value(bld->bld_base.base.gallivm,
   3518                            " +++ end prim masked ones = ",
   3519                            mask);
   3520       lp_build_print_value(bld->bld_base.base.gallivm,
   3521                            " +++ end prim emitted verts1 = ",
   3522                            emitted_vertices_vec);
   3523       lp_build_print_value(bld->bld_base.base.gallivm,
   3524                            " +++ end prim emitted prims1 = ",
   3525                            LLVMBuildLoad(builder,
   3526                                          bld->emitted_prims_vec_ptr, ""));
   3527 #endif
   3528       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
   3529                                 mask);
   3530       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
   3531                                    mask);
   3532 #if DUMP_GS_EMITS
   3533       lp_build_print_value(bld->bld_base.base.gallivm,
   3534                            " +++ end prim emitted verts2 = ",
   3535                            LLVMBuildLoad(builder,
   3536                                          bld->emitted_vertices_vec_ptr, ""));
   3537 #endif
   3538    }
   3539 
   3540 }
   3541 
   3542 static void
   3543 end_primitive(
   3544    const struct lp_build_tgsi_action * action,
   3545    struct lp_build_tgsi_context * bld_base,
   3546    struct lp_build_emit_data * emit_data)
   3547 {
   3548    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3549 
   3550    if (bld->gs_iface->end_primitive) {
   3551       LLVMValueRef mask = mask_vec(bld_base);
   3552       end_primitive_masked(bld_base, mask);
   3553    }
   3554 }
   3555 
   3556 static void
   3557 cal_emit(
   3558    const struct lp_build_tgsi_action * action,
   3559    struct lp_build_tgsi_context * bld_base,
   3560    struct lp_build_emit_data * emit_data)
   3561 {
   3562    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3563 
   3564    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
   3565                      &bld_base->pc);
   3566 }
   3567 
   3568 static void
   3569 ret_emit(
   3570    const struct lp_build_tgsi_action * action,
   3571    struct lp_build_tgsi_context * bld_base,
   3572    struct lp_build_emit_data * emit_data)
   3573 {
   3574    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3575 
   3576    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
   3577 }
   3578 
   3579 static void
   3580 brk_emit(
   3581    const struct lp_build_tgsi_action * action,
   3582    struct lp_build_tgsi_context * bld_base,
   3583    struct lp_build_emit_data * emit_data)
   3584 {
   3585    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3586 
   3587    lp_exec_break(&bld->exec_mask, bld_base);
   3588 }
   3589 
   3590 static void
   3591 breakc_emit(
   3592    const struct lp_build_tgsi_action * action,
   3593    struct lp_build_tgsi_context * bld_base,
   3594    struct lp_build_emit_data * emit_data)
   3595 {
   3596    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3597    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
   3598    struct lp_build_context *uint_bld = &bld_base->uint_bld;
   3599    LLVMValueRef unsigned_cond =
   3600       LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
   3601    LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
   3602                                     unsigned_cond,
   3603                                     uint_bld->zero);
   3604 
   3605    lp_exec_break_condition(&bld->exec_mask, cond);
   3606 }
   3607 
   3608 static void
   3609 if_emit(
   3610    const struct lp_build_tgsi_action * action,
   3611    struct lp_build_tgsi_context * bld_base,
   3612    struct lp_build_emit_data * emit_data)
   3613 {
   3614    LLVMValueRef tmp;
   3615    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3616 
   3617    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
   3618                       emit_data->args[0], bld->bld_base.base.zero);
   3619    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
   3620 }
   3621 
   3622 static void
   3623 uif_emit(
   3624    const struct lp_build_tgsi_action * action,
   3625    struct lp_build_tgsi_context * bld_base,
   3626    struct lp_build_emit_data * emit_data)
   3627 {
   3628    LLVMValueRef tmp;
   3629    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3630    struct lp_build_context *uint_bld = &bld_base->uint_bld;
   3631 
   3632    tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
   3633                       emit_data->args[0], uint_bld->zero);
   3634    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
   3635 }
   3636 
   3637 static void
   3638 case_emit(
   3639    const struct lp_build_tgsi_action * action,
   3640    struct lp_build_tgsi_context * bld_base,
   3641    struct lp_build_emit_data * emit_data)
   3642 {
   3643    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3644 
   3645    lp_exec_case(&bld->exec_mask, emit_data->args[0]);
   3646 }
   3647 
   3648 static void
   3649 default_emit(
   3650    const struct lp_build_tgsi_action * action,
   3651    struct lp_build_tgsi_context * bld_base,
   3652    struct lp_build_emit_data * emit_data)
   3653 {
   3654    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3655 
   3656    lp_exec_default(&bld->exec_mask, bld_base);
   3657 }
   3658 
   3659 static void
   3660 switch_emit(
   3661    const struct lp_build_tgsi_action * action,
   3662    struct lp_build_tgsi_context * bld_base,
   3663    struct lp_build_emit_data * emit_data)
   3664 {
   3665    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3666 
   3667    lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
   3668 }
   3669 
   3670 static void
   3671 endswitch_emit(
   3672    const struct lp_build_tgsi_action * action,
   3673    struct lp_build_tgsi_context * bld_base,
   3674    struct lp_build_emit_data * emit_data)
   3675 {
   3676    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3677 
   3678    lp_exec_endswitch(&bld->exec_mask, bld_base);
   3679 }
   3680 
   3681 static void
   3682 bgnloop_emit(
   3683    const struct lp_build_tgsi_action * action,
   3684    struct lp_build_tgsi_context * bld_base,
   3685    struct lp_build_emit_data * emit_data)
   3686 {
   3687    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3688 
   3689    lp_exec_bgnloop(&bld->exec_mask);
   3690 }
   3691 
   3692 static void
   3693 bgnsub_emit(
   3694    const struct lp_build_tgsi_action * action,
   3695    struct lp_build_tgsi_context * bld_base,
   3696    struct lp_build_emit_data * emit_data)
   3697 {
   3698    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3699 
   3700    lp_exec_mask_bgnsub(&bld->exec_mask);
   3701 }
   3702 
   3703 static void
   3704 else_emit(
   3705    const struct lp_build_tgsi_action * action,
   3706    struct lp_build_tgsi_context * bld_base,
   3707    struct lp_build_emit_data * emit_data)
   3708 {
   3709    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3710 
   3711    lp_exec_mask_cond_invert(&bld->exec_mask);
   3712 }
   3713 
   3714 static void
   3715 endif_emit(
   3716    const struct lp_build_tgsi_action * action,
   3717    struct lp_build_tgsi_context * bld_base,
   3718    struct lp_build_emit_data * emit_data)
   3719 {
   3720    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3721 
   3722    lp_exec_mask_cond_pop(&bld->exec_mask);
   3723 }
   3724 
   3725 static void
   3726 endloop_emit(
   3727    const struct lp_build_tgsi_action * action,
   3728    struct lp_build_tgsi_context * bld_base,
   3729    struct lp_build_emit_data * emit_data)
   3730 {
   3731    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3732 
   3733    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
   3734 }
   3735 
   3736 static void
   3737 endsub_emit(
   3738    const struct lp_build_tgsi_action * action,
   3739    struct lp_build_tgsi_context * bld_base,
   3740    struct lp_build_emit_data * emit_data)
   3741 {
   3742    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3743 
   3744    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
   3745 }
   3746 
   3747 static void
   3748 cont_emit(
   3749    const struct lp_build_tgsi_action * action,
   3750    struct lp_build_tgsi_context * bld_base,
   3751    struct lp_build_emit_data * emit_data)
   3752 {
   3753    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3754 
   3755    lp_exec_continue(&bld->exec_mask);
   3756 }
   3757 
   3758 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
   3759 {
   3760    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3761    struct gallivm_state * gallivm = bld_base->base.gallivm;
   3762 
   3763    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
   3764       LLVMValueRef array_size =
   3765          lp_build_const_int32(gallivm,
   3766                          bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
   3767       bld->temps_array = lp_build_array_alloca(gallivm,
   3768                                               bld_base->base.vec_type, array_size,
   3769                                               "temp_array");
   3770    }
   3771 
   3772    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
   3773       LLVMValueRef array_size =
   3774          lp_build_const_int32(gallivm,
   3775                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
   3776       bld->outputs_array = lp_build_array_alloca(gallivm,
   3777                                                 bld_base->base.vec_type, array_size,
   3778                                                 "output_array");
   3779    }
   3780 
   3781    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
   3782       LLVMValueRef array_size =
   3783          lp_build_const_int32(gallivm,
   3784                          bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
   3785       bld->imms_array = lp_build_array_alloca(gallivm,
   3786                                               bld_base->base.vec_type, array_size,
   3787                                               "imms_array");
   3788    }
   3789 
   3790    /* If we have indirect addressing in inputs we need to copy them into
   3791     * our alloca array to be able to iterate over them */
   3792    if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
   3793       unsigned index, chan;
   3794       LLVMTypeRef vec_type = bld_base->base.vec_type;
   3795       LLVMValueRef array_size = lp_build_const_int32(gallivm,
   3796             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
   3797       bld->inputs_array = lp_build_array_alloca(gallivm,
   3798                                                vec_type, array_size,
   3799                                                "input_array");
   3800 
   3801       assert(bld_base->info->num_inputs
   3802                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
   3803 
   3804       for (index = 0; index < bld_base->info->num_inputs; ++index) {
   3805          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
   3806             LLVMValueRef lindex =
   3807                lp_build_const_int32(gallivm, index * 4 + chan);
   3808             LLVMValueRef input_ptr =
   3809                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
   3810                             &lindex, 1, "");
   3811             LLVMValueRef value = bld->inputs[index][chan];
   3812             if (value)
   3813                LLVMBuildStore(gallivm->builder, value, input_ptr);
   3814          }
   3815       }
   3816    }
   3817 
   3818    if (bld->gs_iface) {
   3819       struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
   3820       bld->emitted_prims_vec_ptr =
   3821          lp_build_alloca(gallivm,
   3822                          uint_bld->vec_type,
   3823                          "emitted_prims_ptr");
   3824       bld->emitted_vertices_vec_ptr =
   3825          lp_build_alloca(gallivm,
   3826                          uint_bld->vec_type,
   3827                          "emitted_vertices_ptr");
   3828       bld->total_emitted_vertices_vec_ptr =
   3829          lp_build_alloca(gallivm,
   3830                          uint_bld->vec_type,
   3831                          "total_emitted_vertices_ptr");
   3832 
   3833       LLVMBuildStore(gallivm->builder, uint_bld->zero,
   3834                      bld->emitted_prims_vec_ptr);
   3835       LLVMBuildStore(gallivm->builder, uint_bld->zero,
   3836                      bld->emitted_vertices_vec_ptr);
   3837       LLVMBuildStore(gallivm->builder, uint_bld->zero,
   3838                      bld->total_emitted_vertices_vec_ptr);
   3839    }
   3840 
   3841    if (DEBUG_EXECUTION) {
   3842       lp_build_printf(gallivm, "\n");
   3843       emit_dump_file(bld, TGSI_FILE_CONSTANT);
   3844       if (!bld->gs_iface)
   3845          emit_dump_file(bld, TGSI_FILE_INPUT);
   3846    }
   3847 }
   3848 
   3849 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
   3850 {
   3851    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   3852    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
   3853 
   3854    if (DEBUG_EXECUTION) {
   3855       /* for debugging */
   3856       if (0) {
   3857          emit_dump_file(bld, TGSI_FILE_TEMPORARY);
   3858       }
   3859       emit_dump_file(bld, TGSI_FILE_OUTPUT);
   3860       lp_build_printf(bld_base->base.gallivm, "\n");
   3861    }
   3862 
   3863    /* If we have indirect addressing in outputs we need to copy our alloca array
   3864     * to the outputs slots specified by the caller */
   3865    if (bld->gs_iface) {
   3866       LLVMValueRef total_emitted_vertices_vec;
   3867       LLVMValueRef emitted_prims_vec;
   3868       /* implicit end_primitives, needed in case there are any unflushed
   3869          vertices in the cache. Note must not call end_primitive here
   3870          since the exec_mask is not valid at this point. */
   3871       end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
   3872 
   3873       total_emitted_vertices_vec =
   3874          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
   3875       emitted_prims_vec =
   3876          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
   3877 
   3878       bld->gs_iface->gs_epilogue(bld->gs_iface,
   3879                                  &bld->bld_base,
   3880                                  total_emitted_vertices_vec,
   3881                                  emitted_prims_vec);
   3882    } else {
   3883       gather_outputs(bld);
   3884    }
   3885 }
   3886 
   3887 void
   3888 lp_build_tgsi_soa(struct gallivm_state *gallivm,
   3889                   const struct tgsi_token *tokens,
   3890                   struct lp_type type,
   3891                   struct lp_build_mask_context *mask,
   3892                   LLVMValueRef consts_ptr,
   3893                   LLVMValueRef const_sizes_ptr,
   3894                   const struct lp_bld_tgsi_system_values *system_values,
   3895                   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
   3896                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
   3897                   LLVMValueRef context_ptr,
   3898                   LLVMValueRef thread_data_ptr,
   3899                   struct lp_build_sampler_soa *sampler,
   3900                   const struct tgsi_shader_info *info,
   3901                   const struct lp_build_tgsi_gs_iface *gs_iface)
   3902 {
   3903    struct lp_build_tgsi_soa_context bld;
   3904 
   3905    struct lp_type res_type;
   3906 
   3907    assert(type.length <= LP_MAX_VECTOR_LENGTH);
   3908    memset(&res_type, 0, sizeof res_type);
   3909    res_type.width = type.width;
   3910    res_type.length = type.length;
   3911    res_type.sign = 1;
   3912 
   3913    /* Setup build context */
   3914    memset(&bld, 0, sizeof bld);
   3915    lp_build_context_init(&bld.bld_base.base, gallivm, type);
   3916    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
   3917    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
   3918    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
   3919    {
   3920       struct lp_type dbl_type;
   3921       dbl_type = type;
   3922       dbl_type.width *= 2;
   3923       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
   3924    }
   3925    {
   3926       struct lp_type uint64_type;
   3927       uint64_type = lp_uint_type(type);
   3928       uint64_type.width *= 2;
   3929       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
   3930    }
   3931    {
   3932       struct lp_type int64_type;
   3933       int64_type = lp_int_type(type);
   3934       int64_type.width *= 2;
   3935       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
   3936    }
   3937    bld.mask = mask;
   3938    bld.inputs = inputs;
   3939    bld.outputs = outputs;
   3940    bld.consts_ptr = consts_ptr;
   3941    bld.const_sizes_ptr = const_sizes_ptr;
   3942    bld.sampler = sampler;
   3943    bld.bld_base.info = info;
   3944    bld.indirect_files = info->indirect_files;
   3945    bld.context_ptr = context_ptr;
   3946    bld.thread_data_ptr = thread_data_ptr;
   3947 
   3948    /*
   3949     * If the number of temporaries is rather large then we just
   3950     * allocate them as an array right from the start and treat
   3951     * like indirect temporaries.
   3952     */
   3953    if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
   3954       bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
   3955    }
   3956    /*
   3957     * For performance reason immediates are always backed in a static
   3958     * array, but if their number is too great, we have to use just
   3959     * a dynamically allocated array.
   3960     */
   3961    bld.use_immediates_array =
   3962          (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
   3963    if (bld.use_immediates_array) {
   3964       bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
   3965    }
   3966 
   3967 
   3968    bld.bld_base.soa = TRUE;
   3969    bld.bld_base.emit_debug = emit_debug;
   3970    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
   3971    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
   3972    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
   3973    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
   3974    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
   3975    bld.bld_base.emit_store = emit_store;
   3976 
   3977    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
   3978    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
   3979 
   3980    bld.bld_base.emit_prologue = emit_prologue;
   3981    bld.bld_base.emit_epilogue = emit_epilogue;
   3982 
   3983    /* Set opcode actions */
   3984    lp_set_default_actions_cpu(&bld.bld_base);
   3985 
   3986    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
   3987    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
   3988    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
   3989    bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
   3990    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
   3991    bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
   3992    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
   3993    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
   3994    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
   3995    bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
   3996    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
   3997    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
   3998    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
   3999    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
   4000    bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
   4001    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
   4002    bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
   4003    bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
   4004    bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
   4005    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
   4006    bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
   4007    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
   4008    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
   4009    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
   4010    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
   4011    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
   4012    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
   4013    bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
   4014    bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
   4015    bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
   4016    bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
   4017    bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
   4018    /* DX10 sampling ops */
   4019    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
   4020    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
   4021    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
   4022    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
   4023    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
   4024    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
   4025    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
   4026    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
   4027    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
   4028 
   4029    if (gs_iface) {
   4030       /* There's no specific value for this because it should always
   4031        * be set, but apps using ext_geometry_shader4 quite often
   4032        * were forgetting so we're using MAX_VERTEX_VARYING from
   4033        * that spec even though we could debug_assert if it's not
   4034        * set, but that's a lot uglier. */
   4035       uint max_output_vertices;
   4036 
   4037       /* inputs are always indirect with gs */
   4038       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
   4039       bld.gs_iface = gs_iface;
   4040       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
   4041       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
   4042       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
   4043 
   4044       max_output_vertices =
   4045             info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
   4046       if (!max_output_vertices)
   4047          max_output_vertices = 32;
   4048 
   4049       bld.max_output_vertices_vec =
   4050          lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
   4051                                 max_output_vertices);
   4052    }
   4053 
   4054    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
   4055 
   4056    bld.system_values = *system_values;
   4057 
   4058    lp_build_tgsi_llvm(&bld.bld_base, tokens);
   4059 
   4060    if (0) {
   4061       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
   4062       LLVMValueRef function = LLVMGetBasicBlockParent(block);
   4063       debug_printf("11111111111111111111111111111 \n");
   4064       tgsi_dump(tokens, 0);
   4065       lp_debug_dump_value(function);
   4066       debug_printf("2222222222222222222222222222 \n");
   4067    }
   4068 
   4069    if (0) {
   4070       LLVMModuleRef module = LLVMGetGlobalParent(
   4071          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
   4072       LLVMDumpModule(module);
   4073 
   4074    }
   4075    lp_exec_mask_fini(&bld.exec_mask);
   4076 }
   4077