Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial portions
     17  * of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
     23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  **************************************************************************/
     28 
     29 /**
     30  * @file
     31  * TGSI to LLVM IR translation -- SoA.
     32  *
     33  * @author Jose Fonseca <jfonseca (at) vmware.com>
     34  *
     35  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
     36  * Brian Paul, and others.
     37  */
     38 
     39 #include "pipe/p_config.h"
     40 #include "pipe/p_shader_tokens.h"
     41 #include "util/u_debug.h"
     42 #include "util/u_math.h"
     43 #include "util/u_memory.h"
     44 #include "tgsi/tgsi_dump.h"
     45 #include "tgsi/tgsi_exec.h"
     46 #include "tgsi/tgsi_info.h"
     47 #include "tgsi/tgsi_parse.h"
     48 #include "tgsi/tgsi_util.h"
     49 #include "tgsi/tgsi_scan.h"
     50 #include "lp_bld_tgsi_action.h"
     51 #include "lp_bld_type.h"
     52 #include "lp_bld_const.h"
     53 #include "lp_bld_arit.h"
     54 #include "lp_bld_bitarit.h"
     55 #include "lp_bld_gather.h"
     56 #include "lp_bld_init.h"
     57 #include "lp_bld_logic.h"
     58 #include "lp_bld_swizzle.h"
     59 #include "lp_bld_flow.h"
     60 #include "lp_bld_quad.h"
     61 #include "lp_bld_tgsi.h"
     62 #include "lp_bld_limits.h"
     63 #include "lp_bld_debug.h"
     64 #include "lp_bld_printf.h"
     65 #include "lp_bld_sample.h"
     66 
     67 
     68 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
     69 {
     70    LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
     71    LLVMBuilderRef builder = bld->gallivm->builder;
     72 
     73    mask->bld = bld;
     74    mask->has_mask = FALSE;
     75    mask->cond_stack_size = 0;
     76    mask->loop_stack_size = 0;
     77    mask->call_stack_size = 0;
     78 
     79    mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
     80    mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
     81          LLVMConstAllOnes(mask->int_vec_type);
     82 
     83    mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
     84 
     85    LLVMBuildStore(
     86       builder,
     87       LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
     88       mask->loop_limiter);
     89 }
     90 
     91 static void lp_exec_mask_update(struct lp_exec_mask *mask)
     92 {
     93    LLVMBuilderRef builder = mask->bld->gallivm->builder;
     94 
     95    if (mask->loop_stack_size) {
     96       /*for loops we need to update the entire mask at runtime */
     97       LLVMValueRef tmp;
     98       assert(mask->break_mask);
     99       tmp = LLVMBuildAnd(builder,
    100                          mask->cont_mask,
    101                          mask->break_mask,
    102                          "maskcb");
    103       mask->exec_mask = LLVMBuildAnd(builder,
    104                                      mask->cond_mask,
    105                                      tmp,
    106                                      "maskfull");
    107    } else
    108       mask->exec_mask = mask->cond_mask;
    109 
    110    if (mask->call_stack_size) {
    111       mask->exec_mask = LLVMBuildAnd(builder,
    112                                      mask->exec_mask,
    113                                      mask->ret_mask,
    114                                      "callmask");
    115    }
    116 
    117    mask->has_mask = (mask->cond_stack_size > 0 ||
    118                      mask->loop_stack_size > 0 ||
    119                      mask->call_stack_size > 0);
    120 }
    121 
    122 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
    123                                    LLVMValueRef val)
    124 {
    125    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    126 
    127    assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
    128    if (mask->cond_stack_size == 0) {
    129       assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
    130    }
    131    mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
    132    assert(LLVMTypeOf(val) == mask->int_vec_type);
    133    mask->cond_mask = LLVMBuildAnd(builder,
    134                                   mask->cond_mask,
    135                                   val,
    136                                   "");
    137    lp_exec_mask_update(mask);
    138 }
    139 
    140 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
    141 {
    142    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    143    LLVMValueRef prev_mask;
    144    LLVMValueRef inv_mask;
    145 
    146    assert(mask->cond_stack_size);
    147    prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
    148    if (mask->cond_stack_size == 1) {
    149       assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
    150    }
    151 
    152    inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
    153 
    154    mask->cond_mask = LLVMBuildAnd(builder,
    155                                   inv_mask,
    156                                   prev_mask, "");
    157    lp_exec_mask_update(mask);
    158 }
    159 
    160 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
    161 {
    162    assert(mask->cond_stack_size);
    163    mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
    164    lp_exec_mask_update(mask);
    165 }
    166 
    167 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
    168 {
    169    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    170 
    171    if (mask->loop_stack_size == 0) {
    172       assert(mask->loop_block == NULL);
    173       assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
    174       assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
    175       assert(mask->break_var == NULL);
    176    }
    177 
    178    assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
    179 
    180    mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
    181    mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
    182    mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
    183    mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
    184    ++mask->loop_stack_size;
    185 
    186    mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
    187    LLVMBuildStore(builder, mask->break_mask, mask->break_var);
    188 
    189    mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
    190 
    191    LLVMBuildBr(builder, mask->loop_block);
    192    LLVMPositionBuilderAtEnd(builder, mask->loop_block);
    193 
    194    mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
    195 
    196    lp_exec_mask_update(mask);
    197 }
    198 
    199 static void lp_exec_break(struct lp_exec_mask *mask)
    200 {
    201    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    202    LLVMValueRef exec_mask = LLVMBuildNot(builder,
    203                                          mask->exec_mask,
    204                                          "break");
    205 
    206    mask->break_mask = LLVMBuildAnd(builder,
    207                                    mask->break_mask,
    208                                    exec_mask, "break_full");
    209 
    210    lp_exec_mask_update(mask);
    211 }
    212 
    213 static void lp_exec_continue(struct lp_exec_mask *mask)
    214 {
    215    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    216    LLVMValueRef exec_mask = LLVMBuildNot(builder,
    217                                          mask->exec_mask,
    218                                          "");
    219 
    220    mask->cont_mask = LLVMBuildAnd(builder,
    221                                   mask->cont_mask,
    222                                   exec_mask, "");
    223 
    224    lp_exec_mask_update(mask);
    225 }
    226 
    227 
    228 static void lp_exec_endloop(struct gallivm_state *gallivm,
    229                             struct lp_exec_mask *mask)
    230 {
    231    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    232    LLVMBasicBlockRef endloop;
    233    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
    234    LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
    235                                                mask->bld->type.width *
    236                                                mask->bld->type.length);
    237    LLVMValueRef i1cond, i2cond, icond, limiter;
    238 
    239    assert(mask->break_mask);
    240 
    241    /*
    242     * Restore the cont_mask, but don't pop
    243     */
    244    assert(mask->loop_stack_size);
    245    mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
    246    lp_exec_mask_update(mask);
    247 
    248    /*
    249     * Unlike the continue mask, the break_mask must be preserved across loop
    250     * iterations
    251     */
    252    LLVMBuildStore(builder, mask->break_mask, mask->break_var);
    253 
    254    /* Decrement the loop limiter */
    255    limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
    256 
    257    limiter = LLVMBuildSub(
    258       builder,
    259       limiter,
    260       LLVMConstInt(int_type, 1, false),
    261       "");
    262 
    263    LLVMBuildStore(builder, limiter, mask->loop_limiter);
    264 
    265    /* i1cond = (mask != 0) */
    266    i1cond = LLVMBuildICmp(
    267       builder,
    268       LLVMIntNE,
    269       LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
    270       LLVMConstNull(reg_type), "");
    271 
    272    /* i2cond = (looplimiter > 0) */
    273    i2cond = LLVMBuildICmp(
    274       builder,
    275       LLVMIntSGT,
    276       limiter,
    277       LLVMConstNull(int_type), "");
    278 
    279    /* if( i1cond && i2cond ) */
    280    icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
    281 
    282    endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
    283 
    284    LLVMBuildCondBr(builder,
    285                    icond, mask->loop_block, endloop);
    286 
    287    LLVMPositionBuilderAtEnd(builder, endloop);
    288 
    289    assert(mask->loop_stack_size);
    290    --mask->loop_stack_size;
    291    mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
    292    mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
    293    mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
    294    mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
    295 
    296    lp_exec_mask_update(mask);
    297 }
    298 
    299 /* stores val into an address pointed to by dst.
    300  * mask->exec_mask is used to figure out which bits of val
    301  * should be stored into the address
    302  * (0 means don't store this bit, 1 means do store).
    303  */
    304 static void lp_exec_mask_store(struct lp_exec_mask *mask,
    305                                struct lp_build_context *bld_store,
    306                                LLVMValueRef pred,
    307                                LLVMValueRef val,
    308                                LLVMValueRef dst)
    309 {
    310    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    311 
    312    /* Mix the predicate and execution mask */
    313    if (mask->has_mask) {
    314       if (pred) {
    315          pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
    316       } else {
    317          pred = mask->exec_mask;
    318       }
    319    }
    320 
    321    if (pred) {
    322       LLVMValueRef real_val, dst_val;
    323 
    324       dst_val = LLVMBuildLoad(builder, dst, "");
    325       real_val = lp_build_select(bld_store,
    326                                  pred,
    327                                  val, dst_val);
    328 
    329       LLVMBuildStore(builder, real_val, dst);
    330    } else
    331       LLVMBuildStore(builder, val, dst);
    332 }
    333 
    334 static void lp_exec_mask_call(struct lp_exec_mask *mask,
    335                               int func,
    336                               int *pc)
    337 {
    338    assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
    339    mask->call_stack[mask->call_stack_size].pc = *pc;
    340    mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
    341    mask->call_stack_size++;
    342    *pc = func;
    343 }
    344 
    345 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
    346 {
    347    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    348    LLVMValueRef exec_mask;
    349 
    350    if (mask->call_stack_size == 0) {
    351       /* returning from main() */
    352       *pc = -1;
    353       return;
    354    }
    355    exec_mask = LLVMBuildNot(builder,
    356                             mask->exec_mask,
    357                             "ret");
    358 
    359    mask->ret_mask = LLVMBuildAnd(builder,
    360                                  mask->ret_mask,
    361                                  exec_mask, "ret_full");
    362 
    363    lp_exec_mask_update(mask);
    364 }
    365 
    366 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
    367 {
    368 }
    369 
    370 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
    371 {
    372    assert(mask->call_stack_size);
    373    mask->call_stack_size--;
    374    *pc = mask->call_stack[mask->call_stack_size].pc;
    375    mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
    376    lp_exec_mask_update(mask);
    377 }
    378 
    379 
    380 /**
    381  * Return pointer to a temporary register channel (src or dest).
    382  * Note that indirect addressing cannot be handled here.
    383  * \param index  which temporary register
    384  * \param chan  which channel of the temp register.
    385  */
    386 LLVMValueRef
    387 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
    388              unsigned index,
    389              unsigned chan)
    390 {
    391    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    392    assert(chan < 4);
    393    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
    394       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
    395       return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
    396    }
    397    else {
    398       return bld->temps[index][chan];
    399    }
    400 }
    401 
    402 /**
    403  * Return pointer to a output register channel (src or dest).
    404  * Note that indirect addressing cannot be handled here.
    405  * \param index  which output register
    406  * \param chan  which channel of the output register.
    407  */
    408 LLVMValueRef
    409 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
    410                unsigned index,
    411                unsigned chan)
    412 {
    413    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    414    assert(chan < 4);
    415    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
    416       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
    417                                                  index * 4 + chan);
    418       return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
    419    }
    420    else {
    421       return bld->outputs[index][chan];
    422    }
    423 }
    424 
    425 /**
    426  * Gather vector.
    427  * XXX the lp_build_gather() function should be capable of doing this
    428  * with a little work.
    429  */
    430 static LLVMValueRef
    431 build_gather(struct lp_build_context *bld,
    432              LLVMValueRef base_ptr,
    433              LLVMValueRef indexes)
    434 {
    435    LLVMBuilderRef builder = bld->gallivm->builder;
    436    LLVMValueRef res = bld->undef;
    437    unsigned i;
    438 
    439    /*
    440     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
    441     */
    442    for (i = 0; i < bld->type.length; i++) {
    443       LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
    444       LLVMValueRef index = LLVMBuildExtractElement(builder,
    445                                                    indexes, ii, "");
    446       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
    447                                              &index, 1, "gather_ptr");
    448       LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
    449 
    450       res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
    451    }
    452 
    453    return res;
    454 }
    455 
    456 
    457 /**
    458  * Scatter/store vector.
    459  */
    460 static void
    461 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
    462                   LLVMValueRef base_ptr,
    463                   LLVMValueRef indexes,
    464                   LLVMValueRef values,
    465                   struct lp_exec_mask *mask,
    466                   LLVMValueRef pred)
    467 {
    468    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    469    LLVMBuilderRef builder = gallivm->builder;
    470    unsigned i;
    471 
    472    /* Mix the predicate and execution mask */
    473    if (mask->has_mask) {
    474       if (pred) {
    475          pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
    476       }
    477       else {
    478          pred = mask->exec_mask;
    479       }
    480    }
    481 
    482    /*
    483     * Loop over elements of index_vec, store scalar value.
    484     */
    485    for (i = 0; i < bld->bld_base.base.type.length; i++) {
    486       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
    487       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
    488       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
    489       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
    490       LLVMValueRef scalar_pred = pred ?
    491          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
    492 
    493       if (0)
    494          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
    495                          ii, val, index, scalar_ptr);
    496 
    497       if (scalar_pred) {
    498          LLVMValueRef real_val, dst_val;
    499          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
    500          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
    501          LLVMBuildStore(builder, real_val, scalar_ptr);
    502       }
    503       else {
    504          LLVMBuildStore(builder, val, scalar_ptr);
    505       }
    506    }
    507 }
    508 
    509 
    510 /**
    511  * Read the current value of the ADDR register, convert the floats to
    512  * ints, add the base index and return the vector of offsets.
    513  * The offsets will be used to index into the constant buffer or
    514  * temporary register file.
    515  */
    516 static LLVMValueRef
    517 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
    518                    unsigned reg_file, unsigned reg_index,
    519                    const struct tgsi_src_register *indirect_reg)
    520 {
    521    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    522    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
    523    /* always use X component of address register */
    524    unsigned swizzle = indirect_reg->SwizzleX;
    525    LLVMValueRef base;
    526    LLVMValueRef rel;
    527    LLVMValueRef max_index;
    528    LLVMValueRef index;
    529 
    530    assert(bld->indirect_files & (1 << reg_file));
    531 
    532    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
    533 
    534    assert(swizzle < 4);
    535    rel = LLVMBuildLoad(builder,
    536                         bld->addr[indirect_reg->Index][swizzle],
    537                         "load addr reg");
    538 
    539    index = lp_build_add(uint_bld, base, rel);
    540 
    541    max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
    542                                       uint_bld->type,
    543                                       bld->bld_base.info->file_max[reg_file]);
    544 
    545    assert(!uint_bld->type.sign);
    546    index = lp_build_min(uint_bld, index, max_index);
    547 
    548    return index;
    549 }
    550 
    551 static struct lp_build_context *
    552 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
    553 	       enum tgsi_opcode_type stype)
    554 {
    555    struct lp_build_context *bld_fetch;
    556 
    557    switch (stype) {
    558    case TGSI_TYPE_FLOAT:
    559    case TGSI_TYPE_UNTYPED:
    560       bld_fetch = &bld_base->base;
    561       break;
    562    case TGSI_TYPE_UNSIGNED:
    563       bld_fetch = &bld_base->uint_bld;
    564       break;
    565    case TGSI_TYPE_SIGNED:
    566       bld_fetch = &bld_base->int_bld;
    567       break;
    568    case TGSI_TYPE_VOID:
    569    case TGSI_TYPE_DOUBLE:
    570    default:
    571       assert(0);
    572       bld_fetch = NULL;
    573       break;
    574    }
    575    return bld_fetch;
    576 }
    577 
    578 static LLVMValueRef
    579 emit_fetch_constant(
    580    struct lp_build_tgsi_context * bld_base,
    581    const struct tgsi_full_src_register * reg,
    582    enum tgsi_opcode_type stype,
    583    unsigned swizzle)
    584 {
    585    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    586    struct gallivm_state *gallivm = bld_base->base.gallivm;
    587    LLVMBuilderRef builder = gallivm->builder;
    588    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    589    LLVMValueRef indirect_index = NULL;
    590    struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
    591 
    592    /* XXX: Handle fetching xyzw components as a vector */
    593    assert(swizzle != ~0);
    594 
    595    if (reg->Register.Indirect) {
    596       indirect_index = get_indirect_index(bld,
    597                                           reg->Register.File,
    598                                           reg->Register.Index,
    599                                           &reg->Indirect);
    600    }
    601 
    602    if (reg->Register.Indirect) {
    603       LLVMValueRef swizzle_vec =
    604          lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
    605       LLVMValueRef index_vec;  /* index into the const buffer */
    606 
    607       /* index_vec = indirect_index * 4 + swizzle */
    608       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
    609       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
    610 
    611       /* Gather values from the constant buffer */
    612       return build_gather(bld_fetch, bld->consts_ptr, index_vec);
    613    }
    614    else {
    615       LLVMValueRef index;  /* index into the const buffer */
    616       LLVMValueRef scalar, scalar_ptr;
    617 
    618       index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
    619 
    620       scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
    621                                    &index, 1, "");
    622 
    623       if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
    624          LLVMTypeRef ivtype = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0);
    625          LLVMValueRef temp_ptr;
    626          temp_ptr = LLVMBuildBitCast(builder, scalar_ptr, ivtype, "");
    627          scalar = LLVMBuildLoad(builder, temp_ptr, "");
    628       } else
    629          scalar = LLVMBuildLoad(builder, scalar_ptr, "");
    630 
    631       return lp_build_broadcast_scalar(bld_fetch, scalar);
    632    }
    633 }
    634 
    635 static LLVMValueRef
    636 emit_fetch_immediate(
    637    struct lp_build_tgsi_context * bld_base,
    638    const struct tgsi_full_src_register * reg,
    639    enum tgsi_opcode_type stype,
    640    unsigned swizzle)
    641 {
    642    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    643    LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
    644    assert(res);
    645 
    646    if (stype == TGSI_TYPE_UNSIGNED) {
    647       res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
    648    } else if (stype == TGSI_TYPE_SIGNED) {
    649       res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
    650    }
    651    return res;
    652 }
    653 
    654 static LLVMValueRef
    655 emit_fetch_input(
    656    struct lp_build_tgsi_context * bld_base,
    657    const struct tgsi_full_src_register * reg,
    658    enum tgsi_opcode_type stype,
    659    unsigned swizzle)
    660 {
    661    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    662    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    663    LLVMBuilderRef builder = gallivm->builder;
    664    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    665    LLVMValueRef indirect_index = NULL;
    666    LLVMValueRef res;
    667 
    668    if (reg->Register.Indirect) {
    669       indirect_index = get_indirect_index(bld,
    670                                           reg->Register.File,
    671                                           reg->Register.Index,
    672                                           &reg->Indirect);
    673    }
    674 
    675    if (reg->Register.Indirect) {
    676       LLVMValueRef swizzle_vec =
    677          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
    678       LLVMValueRef length_vec =
    679          lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
    680       LLVMValueRef index_vec;  /* index into the const buffer */
    681       LLVMValueRef inputs_array;
    682       LLVMTypeRef float4_ptr_type;
    683 
    684       /* index_vec = (indirect_index * 4 + swizzle) * length */
    685       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
    686       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
    687       index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
    688 
    689       /* cast inputs_array pointer to float* */
    690       float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
    691       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
    692                                          float4_ptr_type, "");
    693 
    694       /* Gather values from the temporary register array */
    695       res = build_gather(&bld_base->base, inputs_array, index_vec);
    696    } else {
    697       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
    698          LLVMValueRef lindex = lp_build_const_int32(gallivm,
    699                                         reg->Register.Index * 4 + swizzle);
    700          LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
    701                                                 bld->inputs_array, &lindex, 1, "");
    702          res = LLVMBuildLoad(builder, input_ptr, "");
    703       }
    704       else {
    705          res = bld->inputs[reg->Register.Index][swizzle];
    706       }
    707    }
    708 
    709    assert(res);
    710 
    711    if (stype == TGSI_TYPE_UNSIGNED) {
    712       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
    713    } else if (stype == TGSI_TYPE_SIGNED) {
    714       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
    715    }
    716 
    717    return res;
    718 }
    719 
    720 static LLVMValueRef
    721 emit_fetch_temporary(
    722    struct lp_build_tgsi_context * bld_base,
    723    const struct tgsi_full_src_register * reg,
    724    enum tgsi_opcode_type stype,
    725    unsigned swizzle)
    726 {
    727    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    728    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    729    LLVMBuilderRef builder = gallivm->builder;
    730    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    731    LLVMValueRef indirect_index = NULL;
    732    LLVMValueRef res;
    733 
    734    if (reg->Register.Indirect) {
    735       indirect_index = get_indirect_index(bld,
    736                                           reg->Register.File,
    737                                           reg->Register.Index,
    738                                           &reg->Indirect);
    739    }
    740 
    741    if (reg->Register.Indirect) {
    742       LLVMValueRef swizzle_vec =
    743          lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
    744       LLVMValueRef length_vec =
    745          lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
    746                                 bld->bld_base.base.type.length);
    747       LLVMValueRef index_vec;  /* index into the const buffer */
    748       LLVMValueRef temps_array;
    749       LLVMTypeRef float4_ptr_type;
    750 
    751       /* index_vec = (indirect_index * 4 + swizzle) * length */
    752       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
    753       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
    754       index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
    755 
    756       /* cast temps_array pointer to float* */
    757       float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
    758       temps_array = LLVMBuildBitCast(builder, bld->temps_array,
    759                                      float4_ptr_type, "");
    760 
    761       /* Gather values from the temporary register array */
    762       res = build_gather(&bld_base->base, temps_array, index_vec);
    763    }
    764    else {
    765       LLVMValueRef temp_ptr;
    766       if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
    767          LLVMTypeRef itype = LLVMPointerType(bld->bld_base.int_bld.vec_type, 0);
    768          LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
    769                                                      swizzle);
    770          temp_ptr = LLVMBuildBitCast(builder, tint_ptr, itype, "");
    771       } else
    772          temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
    773       res = LLVMBuildLoad(builder, temp_ptr, "");
    774       if (!res)
    775          return bld->bld_base.base.undef;
    776    }
    777 
    778    return res;
    779 }
    780 
    781 static LLVMValueRef
    782 emit_fetch_system_value(
    783    struct lp_build_tgsi_context * bld_base,
    784    const struct tgsi_full_src_register * reg,
    785    enum tgsi_opcode_type stype,
    786    unsigned swizzle)
    787 {
    788    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    789    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    790    const struct tgsi_shader_info *info = bld->bld_base.info;
    791    LLVMBuilderRef builder = gallivm->builder;
    792    LLVMValueRef res;
    793    enum tgsi_opcode_type atype; // Actual type of the value
    794 
    795    assert(!reg->Register.Indirect);
    796 
    797    switch (info->system_value_semantic_name[reg->Register.Index]) {
    798    case TGSI_SEMANTIC_INSTANCEID:
    799       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
    800       atype = TGSI_TYPE_UNSIGNED;
    801       break;
    802 
    803    case TGSI_SEMANTIC_VERTEXID:
    804       res = bld->system_values.vertex_id;
    805       atype = TGSI_TYPE_UNSIGNED;
    806       break;
    807 
    808    default:
    809       assert(!"unexpected semantic in emit_fetch_system_value");
    810       res = bld_base->base.zero;
    811       atype = TGSI_TYPE_FLOAT;
    812       break;
    813    }
    814 
    815    if (atype != stype) {
    816       if (stype == TGSI_TYPE_FLOAT) {
    817          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
    818       } else if (stype == TGSI_TYPE_UNSIGNED) {
    819          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
    820       } else if (stype == TGSI_TYPE_SIGNED) {
    821          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
    822       }
    823    }
    824 
    825    return res;
    826 }
    827 
    828 /**
    829  * Register fetch with derivatives.
    830  */
    831 static void
    832 emit_fetch_deriv(
    833    struct lp_build_tgsi_soa_context *bld,
    834    LLVMValueRef src,
    835    LLVMValueRef *res,
    836    LLVMValueRef *ddx,
    837    LLVMValueRef *ddy)
    838 {
    839    if(res)
    840       *res = src;
    841 
    842    /* TODO: use interpolation coeffs for inputs */
    843 
    844    if(ddx)
    845       *ddx = lp_build_ddx(&bld->bld_base.base, src);
    846 
    847    if(ddy)
    848       *ddy = lp_build_ddy(&bld->bld_base.base, src);
    849 }
    850 
    851 
    852 /**
    853  * Predicate.
    854  */
    855 static void
    856 emit_fetch_predicate(
    857    struct lp_build_tgsi_soa_context *bld,
    858    const struct tgsi_full_instruction *inst,
    859    LLVMValueRef *pred)
    860 {
    861    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    862    unsigned index;
    863    unsigned char swizzles[4];
    864    LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
    865    LLVMValueRef value;
    866    unsigned chan;
    867 
    868    if (!inst->Instruction.Predicate) {
    869       TGSI_FOR_EACH_CHANNEL( chan ) {
    870          pred[chan] = NULL;
    871       }
    872       return;
    873    }
    874 
    875    swizzles[0] = inst->Predicate.SwizzleX;
    876    swizzles[1] = inst->Predicate.SwizzleY;
    877    swizzles[2] = inst->Predicate.SwizzleZ;
    878    swizzles[3] = inst->Predicate.SwizzleW;
    879 
    880    index = inst->Predicate.Index;
    881    assert(index < LP_MAX_TGSI_PREDS);
    882 
    883    TGSI_FOR_EACH_CHANNEL( chan ) {
    884       unsigned swizzle = swizzles[chan];
    885 
    886       /*
    887        * Only fetch the predicate register channels that are actually listed
    888        * in the swizzles
    889        */
    890       if (!unswizzled[swizzle]) {
    891          value = LLVMBuildLoad(builder,
    892                                bld->preds[index][swizzle], "");
    893 
    894          /*
    895           * Convert the value to an integer mask.
    896           *
    897           * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
    898           * is needlessly causing two comparisons due to storing the intermediate
    899           * result as float vector instead of an integer mask vector.
    900           */
    901          value = lp_build_compare(bld->bld_base.base.gallivm,
    902                                   bld->bld_base.base.type,
    903                                   PIPE_FUNC_NOTEQUAL,
    904                                   value,
    905                                   bld->bld_base.base.zero);
    906          if (inst->Predicate.Negate) {
    907             value = LLVMBuildNot(builder, value, "");
    908          }
    909 
    910          unswizzled[swizzle] = value;
    911       } else {
    912          value = unswizzled[swizzle];
    913       }
    914 
    915       pred[chan] = value;
    916    }
    917 }
    918 
    919 /**
    920  * Register store.
    921  */
    922 static void
    923 emit_store_chan(
    924    struct lp_build_tgsi_context *bld_base,
    925    const struct tgsi_full_instruction *inst,
    926    unsigned index,
    927    unsigned chan_index,
    928    LLVMValueRef pred,
    929    LLVMValueRef value)
    930 {
    931    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    932    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    933    LLVMBuilderRef builder = gallivm->builder;
    934    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
    935    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    936    LLVMValueRef indirect_index = NULL;
    937    struct lp_build_context *bld_store;
    938    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
    939 
    940    switch (dtype) {
    941    default:
    942    case TGSI_TYPE_FLOAT:
    943    case TGSI_TYPE_UNTYPED:
    944       bld_store = &bld_base->base;
    945       break;
    946    case TGSI_TYPE_UNSIGNED:
    947       bld_store = &bld_base->uint_bld;
    948       break;
    949    case TGSI_TYPE_SIGNED:
    950       bld_store = &bld_base->int_bld;
    951       break;
    952    case TGSI_TYPE_DOUBLE:
    953    case TGSI_TYPE_VOID:
    954       assert(0);
    955       bld_store = NULL;
    956       break;
    957    }
    958 
    959    switch( inst->Instruction.Saturate ) {
    960    case TGSI_SAT_NONE:
    961       break;
    962 
    963    case TGSI_SAT_ZERO_ONE:
    964       value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
    965       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
    966       break;
    967 
    968    case TGSI_SAT_MINUS_PLUS_ONE:
    969       value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
    970       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
    971       break;
    972 
    973    default:
    974       assert(0);
    975    }
    976 
    977    if (reg->Register.Indirect) {
    978       indirect_index = get_indirect_index(bld,
    979                                           reg->Register.File,
    980                                           reg->Register.Index,
    981                                           &reg->Indirect);
    982    } else {
    983       assert(reg->Register.Index <=
    984                              bld->bld_base.info->file_max[reg->Register.File]);
    985    }
    986 
    987    switch( reg->Register.File ) {
    988    case TGSI_FILE_OUTPUT:
    989       if (reg->Register.Indirect) {
    990          LLVMValueRef chan_vec =
    991             lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
    992          LLVMValueRef length_vec =
    993             lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
    994          LLVMValueRef index_vec;  /* indexes into the temp registers */
    995          LLVMValueRef outputs_array;
    996          LLVMValueRef pixel_offsets;
    997          LLVMTypeRef float_ptr_type;
    998          int i;
    999 
   1000          /* build pixel offset vector: {0, 1, 2, 3, ...} */
   1001          pixel_offsets = uint_bld->undef;
   1002          for (i = 0; i < bld->bld_base.base.type.length; i++) {
   1003             LLVMValueRef ii = lp_build_const_int32(gallivm, i);
   1004             pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
   1005                                                    ii, ii, "");
   1006          }
   1007 
   1008          /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
   1009          index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
   1010          index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
   1011          index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
   1012          index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
   1013 
   1014          float_ptr_type =
   1015             LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1016          outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
   1017                                           float_ptr_type, "");
   1018 
   1019          /* Scatter store values into temp registers */
   1020          emit_mask_scatter(bld, outputs_array, index_vec, value,
   1021                            &bld->exec_mask, pred);
   1022       }
   1023       else {
   1024          LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
   1025                                                chan_index);
   1026          lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
   1027       }
   1028       break;
   1029 
   1030    case TGSI_FILE_TEMPORARY:
   1031       if (reg->Register.Indirect) {
   1032          LLVMValueRef chan_vec =
   1033             lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
   1034          LLVMValueRef length_vec =
   1035             lp_build_const_int_vec(gallivm, uint_bld->type,
   1036                                    bld->bld_base.base.type.length);
   1037          LLVMValueRef index_vec;  /* indexes into the temp registers */
   1038          LLVMValueRef temps_array;
   1039          LLVMValueRef pixel_offsets;
   1040          LLVMTypeRef float_ptr_type;
   1041          int i;
   1042 
   1043          /* build pixel offset vector: {0, 1, 2, 3, ...} */
   1044          pixel_offsets = uint_bld->undef;
   1045          for (i = 0; i < bld->bld_base.base.type.length; i++) {
   1046             LLVMValueRef ii = lp_build_const_int32(gallivm, i);
   1047             pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
   1048                                                    ii, ii, "");
   1049          }
   1050 
   1051          /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
   1052          index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
   1053          index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
   1054          index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
   1055          index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
   1056 
   1057          float_ptr_type =
   1058             LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
   1059          temps_array = LLVMBuildBitCast(builder, bld->temps_array,
   1060                                         float_ptr_type, "");
   1061 
   1062          /* Scatter store values into temp registers */
   1063          emit_mask_scatter(bld, temps_array, index_vec, value,
   1064                            &bld->exec_mask, pred);
   1065       }
   1066       else {
   1067          LLVMValueRef temp_ptr;
   1068 
   1069          switch (dtype) {
   1070          case TGSI_TYPE_UNSIGNED:
   1071          case TGSI_TYPE_SIGNED: {
   1072             LLVMTypeRef itype = bld_base->int_bld.vec_type;
   1073             LLVMTypeRef ivtype = LLVMPointerType(itype, 0);
   1074             LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
   1075                                                         chan_index);
   1076             LLVMValueRef temp_value_ptr;
   1077 
   1078             temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, "");
   1079             temp_value_ptr = LLVMBuildBitCast(builder, value, itype, "");
   1080             value = temp_value_ptr;
   1081             break;
   1082          }
   1083          default:
   1084          case TGSI_TYPE_FLOAT:
   1085          case TGSI_TYPE_UNTYPED:
   1086             temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
   1087                                            chan_index);
   1088             break;
   1089          }
   1090 
   1091          lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr);
   1092       }
   1093       break;
   1094 
   1095    case TGSI_FILE_ADDRESS:
   1096       assert(dtype == TGSI_TYPE_SIGNED);
   1097       assert(LLVMTypeOf(value) == bld_base->base.int_vec_type);
   1098       lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
   1099                          bld->addr[reg->Register.Index][chan_index]);
   1100       break;
   1101 
   1102    case TGSI_FILE_PREDICATE:
   1103       lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
   1104                          bld->preds[reg->Register.Index][chan_index]);
   1105       break;
   1106 
   1107    default:
   1108       assert( 0 );
   1109    }
   1110 }
   1111 
   1112 static void
   1113 emit_store(
   1114    struct lp_build_tgsi_context * bld_base,
   1115    const struct tgsi_full_instruction * inst,
   1116    const struct tgsi_opcode_info * info,
   1117    LLVMValueRef dst[4])
   1118 
   1119 {
   1120    unsigned chan_index;
   1121    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1122 
   1123    if(info->num_dst) {
   1124       LLVMValueRef pred[TGSI_NUM_CHANNELS];
   1125 
   1126       emit_fetch_predicate( bld, inst, pred );
   1127 
   1128       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
   1129          emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
   1130       }
   1131    }
   1132 }
   1133 
   1134 /**
   1135  * High-level instruction translators.
   1136  */
   1137 
   1138 static void
   1139 emit_tex( struct lp_build_tgsi_soa_context *bld,
   1140           const struct tgsi_full_instruction *inst,
   1141           enum lp_build_tex_modifier modifier,
   1142           LLVMValueRef *texel)
   1143 {
   1144    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   1145    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1146    unsigned unit;
   1147    LLVMValueRef lod_bias, explicit_lod;
   1148    LLVMValueRef oow = NULL;
   1149    LLVMValueRef coords[3];
   1150    struct lp_derivatives derivs;
   1151    unsigned num_coords;
   1152    unsigned dims;
   1153    unsigned i;
   1154 
   1155    if (!bld->sampler) {
   1156       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
   1157       for (i = 0; i < 4; i++) {
   1158          texel[i] = bld->bld_base.base.undef;
   1159       }
   1160       return;
   1161    }
   1162 
   1163    derivs.ddx_ddy[0] = bld->bld_base.base.undef;
   1164    derivs.ddx_ddy[1] = bld->bld_base.base.undef;
   1165 
   1166    switch (inst->Texture.Texture) {
   1167    case TGSI_TEXTURE_1D:
   1168       num_coords = 1;
   1169       dims = 1;
   1170       break;
   1171    case TGSI_TEXTURE_1D_ARRAY:
   1172       num_coords = 2;
   1173       dims = 1;
   1174       break;
   1175    case TGSI_TEXTURE_2D:
   1176    case TGSI_TEXTURE_RECT:
   1177       num_coords = 2;
   1178       dims = 2;
   1179       break;
   1180    case TGSI_TEXTURE_SHADOW1D:
   1181    case TGSI_TEXTURE_SHADOW1D_ARRAY:
   1182       num_coords = 3;
   1183       dims = 1;
   1184       break;
   1185    case TGSI_TEXTURE_SHADOW2D:
   1186    case TGSI_TEXTURE_SHADOWRECT:
   1187    case TGSI_TEXTURE_2D_ARRAY:
   1188    case TGSI_TEXTURE_CUBE:
   1189       num_coords = 3;
   1190       dims = 2;
   1191       break;
   1192    case TGSI_TEXTURE_3D:
   1193       num_coords = 3;
   1194       dims = 3;
   1195       break;
   1196    case TGSI_TEXTURE_SHADOW2D_ARRAY:
   1197       num_coords = 4;
   1198       dims = 2;
   1199       break;
   1200    default:
   1201       assert(0);
   1202       return;
   1203    }
   1204 
   1205    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
   1206       lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
   1207       explicit_lod = NULL;
   1208    }
   1209    else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
   1210       lod_bias = NULL;
   1211       explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
   1212    }
   1213    else {
   1214       lod_bias = NULL;
   1215       explicit_lod = NULL;
   1216    }
   1217 
   1218    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
   1219       oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
   1220       oow = lp_build_rcp(&bld->bld_base.base, oow);
   1221    }
   1222 
   1223    for (i = 0; i < num_coords; i++) {
   1224       coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
   1225       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
   1226          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
   1227    }
   1228    for (i = num_coords; i < 3; i++) {
   1229       coords[i] = bld->bld_base.base.undef;
   1230    }
   1231 
   1232    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
   1233       LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
   1234       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
   1235       LLVMValueRef ddxdyonec[3];
   1236       unsigned length = bld->bld_base.base.type.length;
   1237       unsigned num_quads = length / 4;
   1238       unsigned dim;
   1239       unsigned quad;
   1240 
   1241       for (dim = 0; dim < dims; ++dim) {
   1242          LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
   1243          LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
   1244          for (quad = 0; quad < num_quads; ++quad) {
   1245             unsigned s1 = 4*quad;
   1246             unsigned s2 = 4*quad + length;
   1247             shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
   1248             shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
   1249             shuffles[4*quad + 2] = i32undef;
   1250             shuffles[4*quad + 3] = i32undef;
   1251          }
   1252          ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
   1253                                                LLVMConstVector(shuffles, length), "");
   1254       }
   1255       if (dims == 1) {
   1256          derivs.ddx_ddy[0] = ddxdyonec[0];
   1257       }
   1258       else if (dims >= 2) {
   1259          for (quad = 0; quad < num_quads; ++quad) {
   1260             unsigned s1 = 4*quad;
   1261             unsigned s2 = 4*quad + length;
   1262             shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
   1263             shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
   1264             shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
   1265             shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
   1266          }
   1267          derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1],
   1268                                                   LLVMConstVector(shuffles, length), "");
   1269          if (dims == 3) {
   1270             derivs.ddx_ddy[1] = ddxdyonec[2];
   1271          }
   1272       }
   1273       unit = inst->Src[3].Register.Index;
   1274    }  else {
   1275       if (dims == 1) {
   1276          derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
   1277       }
   1278       else if (dims >= 2) {
   1279          derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
   1280                                                             coords[0], coords[1]);
   1281          if (dims == 3) {
   1282             derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
   1283          }
   1284       }
   1285       unit = inst->Src[1].Register.Index;
   1286    }
   1287 
   1288    bld->sampler->emit_fetch_texel(bld->sampler,
   1289                                   bld->bld_base.base.gallivm,
   1290                                   bld->bld_base.base.type,
   1291                                   unit, num_coords, coords,
   1292                                   &derivs,
   1293                                   lod_bias, explicit_lod,
   1294                                   texel);
   1295 }
   1296 
   1297 static void
   1298 emit_txq( struct lp_build_tgsi_soa_context *bld,
   1299           const struct tgsi_full_instruction *inst,
   1300           LLVMValueRef *sizes_out)
   1301 {
   1302    LLVMValueRef explicit_lod;
   1303    unsigned num_coords, has_lod;
   1304    unsigned i;
   1305 
   1306    switch (inst->Texture.Texture) {
   1307    case TGSI_TEXTURE_1D:
   1308    case TGSI_TEXTURE_SHADOW1D:
   1309    case TGSI_TEXTURE_SHADOW2D:
   1310    case TGSI_TEXTURE_SHADOWCUBE:
   1311       num_coords = 1;
   1312       has_lod = 1;
   1313       break;
   1314    case TGSI_TEXTURE_2D:
   1315    case TGSI_TEXTURE_CUBE:
   1316    case TGSI_TEXTURE_1D_ARRAY:
   1317    case TGSI_TEXTURE_SHADOW1D_ARRAY:
   1318       num_coords = 2;
   1319       has_lod = 1;
   1320       break;
   1321    case TGSI_TEXTURE_3D:
   1322 // case TGSI_TEXTURE_CUBE_ARRAY:
   1323 // case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
   1324    case TGSI_TEXTURE_2D_ARRAY:
   1325    case TGSI_TEXTURE_SHADOW2D_ARRAY:
   1326       num_coords = 3;
   1327       has_lod = 1;
   1328       break;
   1329 
   1330    case TGSI_TEXTURE_BUFFER:
   1331       num_coords = 1;
   1332       has_lod = 0;
   1333       break;
   1334 
   1335    case TGSI_TEXTURE_RECT:
   1336    case TGSI_TEXTURE_SHADOWRECT:
   1337 // case TGSI_TEXTURE_2D_MS:
   1338       num_coords = 2;
   1339       has_lod = 0;
   1340       break;
   1341 
   1342 // case TGSI_TEXTURE_2D_MS_ARRAY:
   1343 //    num_coords = 3;
   1344 //    has_lod = 0;
   1345 //    break;
   1346 
   1347    default:
   1348       assert(0);
   1349       return;
   1350    }
   1351 
   1352    if (!bld->sampler) {
   1353       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
   1354       for (i = 0; i < num_coords; i++)
   1355          sizes_out[i] = bld->bld_base.base.undef;
   1356       return;
   1357    }
   1358 
   1359    if (has_lod)
   1360       explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 2 );
   1361    else
   1362       explicit_lod = NULL;
   1363 
   1364    bld->sampler->emit_size_query(bld->sampler,
   1365                                  bld->bld_base.base.gallivm,
   1366                                  bld->bld_base.int_bld.type,
   1367                                  inst->Src[1].Register.Index,
   1368                                  explicit_lod,
   1369                                  sizes_out);
   1370 }
   1371 
   1372 static boolean
   1373 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
   1374 		   int pc)
   1375 {
   1376    int i;
   1377 
   1378    for (i = 0; i < 5; i++) {
   1379       unsigned opcode;
   1380 
   1381       if (pc + i >= bld->bld_base.info->num_instructions)
   1382 	 return TRUE;
   1383 
   1384       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
   1385 
   1386       if (opcode == TGSI_OPCODE_END)
   1387 	 return TRUE;
   1388 
   1389       if (opcode == TGSI_OPCODE_TEX ||
   1390 	  opcode == TGSI_OPCODE_TXP ||
   1391 	  opcode == TGSI_OPCODE_TXD ||
   1392 	  opcode == TGSI_OPCODE_TXB ||
   1393 	  opcode == TGSI_OPCODE_TXL ||
   1394 	  opcode == TGSI_OPCODE_TXF ||
   1395 	  opcode == TGSI_OPCODE_TXQ ||
   1396 	  opcode == TGSI_OPCODE_CAL ||
   1397 	  opcode == TGSI_OPCODE_CALLNZ ||
   1398 	  opcode == TGSI_OPCODE_IF ||
   1399 	  opcode == TGSI_OPCODE_IFC ||
   1400 	  opcode == TGSI_OPCODE_BGNLOOP ||
   1401 	  opcode == TGSI_OPCODE_SWITCH)
   1402 	 return FALSE;
   1403    }
   1404 
   1405    return TRUE;
   1406 }
   1407 
   1408 
   1409 
   1410 /**
   1411  * Kill fragment if any of the src register values are negative.
   1412  */
   1413 static void
   1414 emit_kil(
   1415    struct lp_build_tgsi_soa_context *bld,
   1416    const struct tgsi_full_instruction *inst,
   1417    int pc)
   1418 {
   1419    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   1420    const struct tgsi_full_src_register *reg = &inst->Src[0];
   1421    LLVMValueRef terms[TGSI_NUM_CHANNELS];
   1422    LLVMValueRef mask;
   1423    unsigned chan_index;
   1424 
   1425    memset(&terms, 0, sizeof terms);
   1426 
   1427    TGSI_FOR_EACH_CHANNEL( chan_index ) {
   1428       unsigned swizzle;
   1429 
   1430       /* Unswizzle channel */
   1431       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
   1432 
   1433       /* Check if the component has not been already tested. */
   1434       assert(swizzle < TGSI_NUM_CHANNELS);
   1435       if( !terms[swizzle] )
   1436          /* TODO: change the comparison operator instead of setting the sign */
   1437          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
   1438    }
   1439 
   1440    mask = NULL;
   1441    TGSI_FOR_EACH_CHANNEL( chan_index ) {
   1442       if(terms[chan_index]) {
   1443          LLVMValueRef chan_mask;
   1444 
   1445          /*
   1446           * If term < 0 then mask = 0 else mask = ~0.
   1447           */
   1448          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
   1449 
   1450          if(mask)
   1451             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
   1452          else
   1453             mask = chan_mask;
   1454       }
   1455    }
   1456 
   1457    if(mask) {
   1458       lp_build_mask_update(bld->mask, mask);
   1459 
   1460       if (!near_end_of_shader(bld, pc))
   1461 	 lp_build_mask_check(bld->mask);
   1462    }
   1463 }
   1464 
   1465 
   1466 /**
   1467  * Predicated fragment kill.
   1468  * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
   1469  * The only predication is the execution mask which will apply if
   1470  * we're inside a loop or conditional.
   1471  */
   1472 static void
   1473 emit_kilp(struct lp_build_tgsi_soa_context *bld,
   1474           int pc)
   1475 {
   1476    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   1477    LLVMValueRef mask;
   1478 
   1479    /* For those channels which are "alive", disable fragment shader
   1480     * execution.
   1481     */
   1482    if (bld->exec_mask.has_mask) {
   1483       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
   1484    }
   1485    else {
   1486       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
   1487       mask = zero;
   1488    }
   1489 
   1490    lp_build_mask_update(bld->mask, mask);
   1491 
   1492    if (!near_end_of_shader(bld, pc))
   1493       lp_build_mask_check(bld->mask);
   1494 }
   1495 
   1496 
   1497 /**
   1498  * Emit code which will dump the value of all the temporary registers
   1499  * to stdout.
   1500  */
   1501 static void
   1502 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
   1503 {
   1504    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1505    LLVMBuilderRef builder = gallivm->builder;
   1506    LLVMValueRef temp_ptr;
   1507    LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
   1508    LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
   1509    LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
   1510    LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
   1511    int index;
   1512    int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
   1513 
   1514    for (index = 0; index < n; index++) {
   1515       LLVMValueRef idx = lp_build_const_int32(gallivm, index);
   1516       LLVMValueRef v[4][4], res;
   1517       int chan;
   1518 
   1519       lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
   1520 
   1521       for (chan = 0; chan < 4; chan++) {
   1522          temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
   1523          res = LLVMBuildLoad(builder, temp_ptr, "");
   1524          v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
   1525          v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
   1526          v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
   1527          v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
   1528       }
   1529 
   1530       lp_build_printf(gallivm, "  X: %f %f %f %f\n",
   1531                       v[0][0], v[0][1], v[0][2], v[0][3]);
   1532       lp_build_printf(gallivm, "  Y: %f %f %f %f\n",
   1533                       v[1][0], v[1][1], v[1][2], v[1][3]);
   1534       lp_build_printf(gallivm, "  Z: %f %f %f %f\n",
   1535                       v[2][0], v[2][1], v[2][2], v[2][3]);
   1536       lp_build_printf(gallivm, "  W: %f %f %f %f\n",
   1537                       v[3][0], v[3][1], v[3][2], v[3][3]);
   1538    }
   1539 }
   1540 
   1541 
   1542 
   1543 void
   1544 lp_emit_declaration_soa(
   1545    struct lp_build_tgsi_context *bld_base,
   1546    const struct tgsi_full_declaration *decl)
   1547 {
   1548    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
   1549    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   1550    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
   1551    const unsigned first = decl->Range.First;
   1552    const unsigned last = decl->Range.Last;
   1553    unsigned idx, i;
   1554 
   1555    for (idx = first; idx <= last; ++idx) {
   1556       assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
   1557       switch (decl->Declaration.File) {
   1558       case TGSI_FILE_TEMPORARY:
   1559          assert(idx < LP_MAX_TGSI_TEMPS);
   1560          if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
   1561             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   1562                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
   1563          }
   1564          break;
   1565 
   1566       case TGSI_FILE_OUTPUT:
   1567          if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
   1568             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   1569                bld->outputs[idx][i] = lp_build_alloca(gallivm,
   1570                                                       vec_type, "output");
   1571          }
   1572          break;
   1573 
   1574       case TGSI_FILE_ADDRESS:
   1575 	 /* ADDR registers are the only allocated with an integer LLVM IR type,
   1576 	  * as they are guaranteed to always have integers.
   1577 	  * XXX: Not sure if this exception is worthwhile (or the whole idea of
   1578 	  * an ADDR register for that matter).
   1579 	  */
   1580          assert(idx < LP_MAX_TGSI_ADDRS);
   1581          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   1582             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
   1583          break;
   1584 
   1585       case TGSI_FILE_PREDICATE:
   1586          assert(idx < LP_MAX_TGSI_PREDS);
   1587          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
   1588             bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
   1589                                                  "predicate");
   1590          break;
   1591 
   1592       default:
   1593          /* don't need to declare other vars */
   1594          break;
   1595       }
   1596    }
   1597 }
   1598 
   1599 
   1600 void lp_emit_immediate_soa(
   1601    struct lp_build_tgsi_context *bld_base,
   1602    const struct tgsi_full_immediate *imm)
   1603 {
   1604    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
   1605    struct gallivm_state * gallivm = bld_base->base.gallivm;
   1606 
   1607    /* simply copy the immediate values into the next immediates[] slot */
   1608    unsigned i;
   1609    const uint size = imm->Immediate.NrTokens - 1;
   1610    assert(size <= 4);
   1611    assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
   1612    switch (imm->Immediate.DataType) {
   1613    case TGSI_IMM_FLOAT32:
   1614       for( i = 0; i < size; ++i )
   1615          bld->immediates[bld->num_immediates][i] =
   1616             lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
   1617 
   1618       break;
   1619    case TGSI_IMM_UINT32:
   1620       for( i = 0; i < size; ++i ) {
   1621          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
   1622          bld->immediates[bld->num_immediates][i] =
   1623             LLVMConstBitCast(tmp, bld_base->base.vec_type);
   1624       }
   1625 
   1626       break;
   1627    case TGSI_IMM_INT32:
   1628       for( i = 0; i < size; ++i ) {
   1629          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
   1630          bld->immediates[bld->num_immediates][i] =
   1631             LLVMConstBitCast(tmp, bld_base->base.vec_type);
   1632       }
   1633 
   1634       break;
   1635    }
   1636    for( i = size; i < 4; ++i )
   1637       bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
   1638 
   1639    bld->num_immediates++;
   1640 }
   1641 
   1642 static void
   1643 ddx_emit(
   1644    const struct lp_build_tgsi_action * action,
   1645    struct lp_build_tgsi_context * bld_base,
   1646    struct lp_build_emit_data * emit_data)
   1647 {
   1648    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1649 
   1650    emit_fetch_deriv(bld, emit_data->args[0], NULL,
   1651                     &emit_data->output[emit_data->chan], NULL);
   1652 }
   1653 
   1654 static void
   1655 ddy_emit(
   1656    const struct lp_build_tgsi_action * action,
   1657    struct lp_build_tgsi_context * bld_base,
   1658    struct lp_build_emit_data * emit_data)
   1659 {
   1660    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1661 
   1662    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
   1663                     &emit_data->output[emit_data->chan]);
   1664 }
   1665 
   1666 static void
   1667 kilp_emit(
   1668    const struct lp_build_tgsi_action * action,
   1669    struct lp_build_tgsi_context * bld_base,
   1670    struct lp_build_emit_data * emit_data)
   1671 {
   1672    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1673 
   1674    emit_kilp(bld, bld_base->pc - 1);
   1675 }
   1676 
   1677 static void
   1678 kil_emit(
   1679    const struct lp_build_tgsi_action * action,
   1680    struct lp_build_tgsi_context * bld_base,
   1681    struct lp_build_emit_data * emit_data)
   1682 {
   1683    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1684 
   1685    emit_kil(bld, emit_data->inst, bld_base->pc - 1);
   1686 }
   1687 
   1688 static void
   1689 tex_emit(
   1690    const struct lp_build_tgsi_action * action,
   1691    struct lp_build_tgsi_context * bld_base,
   1692    struct lp_build_emit_data * emit_data)
   1693 {
   1694    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1695 
   1696    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
   1697 }
   1698 
   1699 static void
   1700 txb_emit(
   1701    const struct lp_build_tgsi_action * action,
   1702    struct lp_build_tgsi_context * bld_base,
   1703    struct lp_build_emit_data * emit_data)
   1704 {
   1705    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1706 
   1707    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
   1708             emit_data->output);
   1709 }
   1710 
   1711 static void
   1712 txd_emit(
   1713    const struct lp_build_tgsi_action * action,
   1714    struct lp_build_tgsi_context * bld_base,
   1715    struct lp_build_emit_data * emit_data)
   1716 {
   1717    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1718 
   1719    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
   1720             emit_data->output);
   1721 }
   1722 
   1723 static void
   1724 txl_emit(
   1725    const struct lp_build_tgsi_action * action,
   1726    struct lp_build_tgsi_context * bld_base,
   1727    struct lp_build_emit_data * emit_data)
   1728 {
   1729    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1730 
   1731    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
   1732             emit_data->output);
   1733 }
   1734 
   1735 static void
   1736 txp_emit(
   1737    const struct lp_build_tgsi_action * action,
   1738    struct lp_build_tgsi_context * bld_base,
   1739    struct lp_build_emit_data * emit_data)
   1740 {
   1741    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1742 
   1743    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
   1744             emit_data->output);
   1745 }
   1746 
   1747 static void
   1748 txq_emit(
   1749    const struct lp_build_tgsi_action * action,
   1750    struct lp_build_tgsi_context * bld_base,
   1751    struct lp_build_emit_data * emit_data)
   1752 {
   1753    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1754 
   1755    emit_txq(bld, emit_data->inst, emit_data->output);
   1756 }
   1757 
   1758 static void
   1759 cal_emit(
   1760    const struct lp_build_tgsi_action * action,
   1761    struct lp_build_tgsi_context * bld_base,
   1762    struct lp_build_emit_data * emit_data)
   1763 {
   1764    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1765 
   1766    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
   1767                      &bld_base->pc);
   1768 }
   1769 
   1770 static void
   1771 ret_emit(
   1772    const struct lp_build_tgsi_action * action,
   1773    struct lp_build_tgsi_context * bld_base,
   1774    struct lp_build_emit_data * emit_data)
   1775 {
   1776    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1777 
   1778    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
   1779 }
   1780 
   1781 static void
   1782 brk_emit(
   1783    const struct lp_build_tgsi_action * action,
   1784    struct lp_build_tgsi_context * bld_base,
   1785    struct lp_build_emit_data * emit_data)
   1786 {
   1787    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1788 
   1789    lp_exec_break(&bld->exec_mask);
   1790 }
   1791 
   1792 static void
   1793 if_emit(
   1794    const struct lp_build_tgsi_action * action,
   1795    struct lp_build_tgsi_context * bld_base,
   1796    struct lp_build_emit_data * emit_data)
   1797 {
   1798    LLVMValueRef tmp;
   1799    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1800 
   1801    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
   1802                       emit_data->args[0], bld->bld_base.base.zero);
   1803    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
   1804 }
   1805 
   1806 static void
   1807 bgnloop_emit(
   1808    const struct lp_build_tgsi_action * action,
   1809    struct lp_build_tgsi_context * bld_base,
   1810    struct lp_build_emit_data * emit_data)
   1811 {
   1812    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1813 
   1814    lp_exec_bgnloop(&bld->exec_mask);
   1815 }
   1816 
   1817 static void
   1818 bgnsub_emit(
   1819    const struct lp_build_tgsi_action * action,
   1820    struct lp_build_tgsi_context * bld_base,
   1821    struct lp_build_emit_data * emit_data)
   1822 {
   1823    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1824 
   1825    lp_exec_mask_bgnsub(&bld->exec_mask);
   1826 }
   1827 
   1828 static void
   1829 else_emit(
   1830    const struct lp_build_tgsi_action * action,
   1831    struct lp_build_tgsi_context * bld_base,
   1832    struct lp_build_emit_data * emit_data)
   1833 {
   1834    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1835 
   1836    lp_exec_mask_cond_invert(&bld->exec_mask);
   1837 }
   1838 
   1839 static void
   1840 endif_emit(
   1841    const struct lp_build_tgsi_action * action,
   1842    struct lp_build_tgsi_context * bld_base,
   1843    struct lp_build_emit_data * emit_data)
   1844 {
   1845    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1846 
   1847    lp_exec_mask_cond_pop(&bld->exec_mask);
   1848 }
   1849 
   1850 static void
   1851 endloop_emit(
   1852    const struct lp_build_tgsi_action * action,
   1853    struct lp_build_tgsi_context * bld_base,
   1854    struct lp_build_emit_data * emit_data)
   1855 {
   1856    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1857 
   1858    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
   1859 }
   1860 
   1861 static void
   1862 endsub_emit(
   1863    const struct lp_build_tgsi_action * action,
   1864    struct lp_build_tgsi_context * bld_base,
   1865    struct lp_build_emit_data * emit_data)
   1866 {
   1867    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1868 
   1869    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
   1870 }
   1871 
   1872 static void
   1873 cont_emit(
   1874    const struct lp_build_tgsi_action * action,
   1875    struct lp_build_tgsi_context * bld_base,
   1876    struct lp_build_emit_data * emit_data)
   1877 {
   1878    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1879 
   1880    lp_exec_continue(&bld->exec_mask);
   1881 }
   1882 
   1883 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
   1884  *
   1885  * XXX: What do the comments about xmm registers mean?  Maybe they are left over
   1886  * from old code, but there is no garauntee that LLVM will use those registers
   1887  * for this code.
   1888  *
   1889  * XXX: There should be no calls to lp_build_emit_fetch in this function.  This
   1890  * should be handled by the emit_data->fetch_args function. */
   1891 static void
   1892 nrm_emit(
   1893    const struct lp_build_tgsi_action * action,
   1894    struct lp_build_tgsi_context * bld_base,
   1895    struct lp_build_emit_data * emit_data)
   1896 {
   1897    LLVMValueRef tmp0, tmp1;
   1898    LLVMValueRef tmp4 = NULL;
   1899    LLVMValueRef tmp5 = NULL;
   1900    LLVMValueRef tmp6 = NULL;
   1901    LLVMValueRef tmp7 = NULL;
   1902    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1903 
   1904    uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
   1905 
   1906   if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
   1907       TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
   1908       TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
   1909       (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
   1910 
   1911       /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
   1912 
   1913       /* xmm4 = src.x */
   1914       /* xmm0 = src.x * src.x */
   1915       tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
   1916       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
   1917          tmp4 = tmp0;
   1918       }
   1919       tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
   1920 
   1921       /* xmm5 = src.y */
   1922       /* xmm0 = xmm0 + src.y * src.y */
   1923       tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
   1924       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
   1925          tmp5 = tmp1;
   1926       }
   1927       tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
   1928       tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
   1929 
   1930       /* xmm6 = src.z */
   1931       /* xmm0 = xmm0 + src.z * src.z */
   1932       tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
   1933       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
   1934          tmp6 = tmp1;
   1935       }
   1936       tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
   1937       tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
   1938 
   1939       if (dims == 4) {
   1940          /* xmm7 = src.w */
   1941          /* xmm0 = xmm0 + src.w * src.w */
   1942          tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
   1943          if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
   1944             tmp7 = tmp1;
   1945          }
   1946          tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
   1947          tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
   1948       }
   1949       /* xmm1 = 1 / sqrt(xmm0) */
   1950       tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
   1951        /* dst.x = xmm1 * src.x */
   1952       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
   1953          emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
   1954       }
   1955       /* dst.y = xmm1 * src.y */
   1956       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
   1957          emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
   1958       }
   1959 
   1960       /* dst.z = xmm1 * src.z */
   1961       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
   1962          emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
   1963       }
   1964       /* dst.w = xmm1 * src.w */
   1965       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
   1966          emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
   1967       }
   1968    }
   1969 
   1970    /* dst.w = 1.0 */
   1971    if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
   1972        emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
   1973    }
   1974 }
   1975 
   1976 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
   1977 {
   1978    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   1979    struct gallivm_state * gallivm = bld_base->base.gallivm;
   1980 
   1981    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
   1982       LLVMValueRef array_size =
   1983          lp_build_const_int32(gallivm,
   1984                          bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
   1985       bld->temps_array = lp_build_array_alloca(gallivm,
   1986                                               bld_base->base.vec_type, array_size,
   1987                                               "temp_array");
   1988    }
   1989 
   1990    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
   1991       LLVMValueRef array_size =
   1992          lp_build_const_int32(gallivm,
   1993                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
   1994       bld->outputs_array = lp_build_array_alloca(gallivm,
   1995                                                 bld_base->base.vec_type, array_size,
   1996                                                 "output_array");
   1997    }
   1998 
   1999    /* If we have indirect addressing in inputs we need to copy them into
   2000     * our alloca array to be able to iterate over them */
   2001    if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
   2002       unsigned index, chan;
   2003       LLVMTypeRef vec_type = bld_base->base.vec_type;
   2004       LLVMValueRef array_size = lp_build_const_int32(gallivm,
   2005             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
   2006       bld->inputs_array = lp_build_array_alloca(gallivm,
   2007                                                vec_type, array_size,
   2008                                                "input_array");
   2009 
   2010       assert(bld_base->info->num_inputs
   2011                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
   2012 
   2013       for (index = 0; index < bld_base->info->num_inputs; ++index) {
   2014          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
   2015             LLVMValueRef lindex =
   2016                lp_build_const_int32(gallivm, index * 4 + chan);
   2017             LLVMValueRef input_ptr =
   2018                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
   2019                             &lindex, 1, "");
   2020             LLVMValueRef value = bld->inputs[index][chan];
   2021             if (value)
   2022                LLVMBuildStore(gallivm->builder, value, input_ptr);
   2023          }
   2024       }
   2025    }
   2026 }
   2027 
   2028 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
   2029 {
   2030    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
   2031 
   2032    if (0) {
   2033       /* for debugging */
   2034       emit_dump_temps(bld);
   2035    }
   2036 
   2037    /* If we have indirect addressing in outputs we need to copy our alloca array
   2038     * to the outputs slots specified by the called */
   2039    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
   2040       unsigned index, chan;
   2041       assert(bld_base->info->num_outputs <=
   2042                         bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
   2043       for (index = 0; index < bld_base->info->num_outputs; ++index) {
   2044          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
   2045             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
   2046          }
   2047       }
   2048    }
   2049 }
   2050 
   2051 void
   2052 lp_build_tgsi_soa(struct gallivm_state *gallivm,
   2053                   const struct tgsi_token *tokens,
   2054                   struct lp_type type,
   2055                   struct lp_build_mask_context *mask,
   2056                   LLVMValueRef consts_ptr,
   2057                   const struct lp_bld_tgsi_system_values *system_values,
   2058                   const LLVMValueRef *pos,
   2059                   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
   2060                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
   2061                   struct lp_build_sampler_soa *sampler,
   2062                   const struct tgsi_shader_info *info)
   2063 {
   2064    struct lp_build_tgsi_soa_context bld;
   2065 
   2066    struct lp_type res_type;
   2067 
   2068    assert(type.length <= LP_MAX_VECTOR_LENGTH);
   2069    memset(&res_type, 0, sizeof res_type);
   2070    res_type.width = type.width;
   2071    res_type.length = type.length;
   2072    res_type.sign = 1;
   2073 
   2074    /* Setup build context */
   2075    memset(&bld, 0, sizeof bld);
   2076    lp_build_context_init(&bld.bld_base.base, gallivm, type);
   2077    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
   2078    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
   2079    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
   2080    bld.mask = mask;
   2081    bld.pos = pos;
   2082    bld.inputs = inputs;
   2083    bld.outputs = outputs;
   2084    bld.consts_ptr = consts_ptr;
   2085    bld.sampler = sampler;
   2086    bld.bld_base.info = info;
   2087    bld.indirect_files = info->indirect_files;
   2088 
   2089    bld.bld_base.soa = TRUE;
   2090    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
   2091    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
   2092    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
   2093    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
   2094    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
   2095    bld.bld_base.emit_store = emit_store;
   2096 
   2097    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
   2098    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
   2099 
   2100    bld.bld_base.emit_prologue = emit_prologue;
   2101    bld.bld_base.emit_epilogue = emit_epilogue;
   2102 
   2103    /* Set opcode actions */
   2104    lp_set_default_actions_cpu(&bld.bld_base);
   2105 
   2106    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
   2107    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
   2108    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
   2109    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
   2110    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
   2111    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
   2112    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
   2113    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
   2114    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
   2115    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
   2116    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
   2117    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
   2118    bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
   2119    bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
   2120    bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
   2121    bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
   2122    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
   2123    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
   2124    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
   2125    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
   2126    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
   2127    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
   2128    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
   2129 
   2130    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
   2131 
   2132    bld.system_values = *system_values;
   2133 
   2134    lp_build_tgsi_llvm(&bld.bld_base, tokens);
   2135 
   2136    if (0) {
   2137       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
   2138       LLVMValueRef function = LLVMGetBasicBlockParent(block);
   2139       debug_printf("11111111111111111111111111111 \n");
   2140       tgsi_dump(tokens, 0);
   2141       lp_debug_dump_value(function);
   2142       debug_printf("2222222222222222222222222222 \n");
   2143    }
   2144 
   2145    if (0) {
   2146       LLVMModuleRef module = LLVMGetGlobalParent(
   2147          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
   2148       LLVMDumpModule(module);
   2149 
   2150    }
   2151 }
   2152