Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * @file
     30  * TGSI to LLVM IR translation -- AoS.
     31  *
     32  * FIXME:
     33  * - No control flow support: the existing control flow code should be factored
     34  * out into from the SoA code into a common module and shared.
     35  * - No derivatives. Derivate logic should be pluggable, just like the samplers.
     36  *
     37  * @author Jose Fonseca <jfonseca (at) vmware.com>
     38  */
     39 
     40 #include "pipe/p_config.h"
     41 #include "pipe/p_shader_tokens.h"
     42 #include "util/u_debug.h"
     43 #include "util/u_math.h"
     44 #include "util/u_memory.h"
     45 #include "tgsi/tgsi_dump.h"
     46 #include "tgsi/tgsi_info.h"
     47 #include "tgsi/tgsi_parse.h"
     48 #include "tgsi/tgsi_util.h"
     49 #include "tgsi/tgsi_scan.h"
     50 #include "lp_bld_type.h"
     51 #include "lp_bld_const.h"
     52 #include "lp_bld_arit.h"
     53 #include "lp_bld_logic.h"
     54 #include "lp_bld_swizzle.h"
     55 #include "lp_bld_flow.h"
     56 #include "lp_bld_quad.h"
     57 #include "lp_bld_tgsi.h"
     58 #include "lp_bld_debug.h"
     59 #include "lp_bld_sample.h"
     60 
     61 
     62 /**
     63  * Wrapper around lp_build_swizzle_aos which translates swizzles to another
     64  * ordering.
     65  */
     66 static LLVMValueRef
     67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
     68             LLVMValueRef a,
     69             unsigned swizzle_x,
     70             unsigned swizzle_y,
     71             unsigned swizzle_z,
     72             unsigned swizzle_w)
     73 {
     74    unsigned char swizzles[4];
     75    struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
     76 
     77    assert(swizzle_x < 4);
     78    assert(swizzle_y < 4);
     79    assert(swizzle_z < 4);
     80    assert(swizzle_w < 4);
     81 
     82    swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
     83    swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
     84    swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
     85    swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
     86 
     87    return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
     88 }
     89 
     90 
     91 static LLVMValueRef
     92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
     93                    LLVMValueRef a,
     94                    unsigned chan)
     95 {
     96    chan = bld->swizzles[chan];
     97    return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
     98 }
     99 
    100 
    101 static LLVMValueRef
    102 emit_fetch_constant(
    103    struct lp_build_tgsi_context * bld_base,
    104    const struct tgsi_full_src_register * reg,
    105    enum tgsi_opcode_type stype,
    106    unsigned swizzle)
    107 {
    108    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    109    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
    110    struct lp_type type = bld_base->base.type;
    111    LLVMValueRef res;
    112    unsigned chan;
    113 
    114    assert(!reg->Register.Indirect);
    115 
    116    /*
    117     * Get the constants components
    118     */
    119 
    120    res = bld->bld_base.base.undef;
    121    for (chan = 0; chan < 4; ++chan) {
    122       LLVMValueRef index;
    123       LLVMValueRef scalar_ptr;
    124       LLVMValueRef scalar;
    125       LLVMValueRef swizzle;
    126 
    127       index = lp_build_const_int32(bld->bld_base.base.gallivm,
    128                                    reg->Register.Index * 4 + chan);
    129 
    130       scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
    131 
    132       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
    133 
    134       lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
    135 
    136       /*
    137        * NOTE: constants array is always assumed to be RGBA
    138        */
    139 
    140       swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
    141                                      bld->swizzles[chan]);
    142 
    143       res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
    144    }
    145 
    146    /*
    147     * Broadcast the first quaternion to all others.
    148     *
    149     * XXX: could be factored into a reusable function.
    150     */
    151 
    152    if (type.length > 4) {
    153       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
    154       unsigned i;
    155 
    156       for (chan = 0; chan < 4; ++chan) {
    157          shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
    158       }
    159 
    160       for (i = 4; i < type.length; ++i) {
    161          shuffles[i] = shuffles[i % 4];
    162       }
    163 
    164       res = LLVMBuildShuffleVector(builder,
    165                                    res, bld->bld_base.base.undef,
    166                                    LLVMConstVector(shuffles, type.length),
    167                                    "");
    168    }
    169    return res;
    170 }
    171 
    172 static LLVMValueRef
    173 emit_fetch_immediate(
    174    struct lp_build_tgsi_context * bld_base,
    175    const struct tgsi_full_src_register * reg,
    176    enum tgsi_opcode_type stype,
    177    unsigned swizzle)
    178 {
    179    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    180    LLVMValueRef res = bld->immediates[reg->Register.Index];
    181    assert(res);
    182    return res;
    183 }
    184 
    185 static LLVMValueRef
    186 emit_fetch_input(
    187    struct lp_build_tgsi_context * bld_base,
    188    const struct tgsi_full_src_register * reg,
    189    enum tgsi_opcode_type stype,
    190    unsigned swizzle)
    191 {
    192    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    193    LLVMValueRef res = bld->inputs[reg->Register.Index];
    194    assert(!reg->Register.Indirect);
    195    assert(res);
    196    return res;
    197 }
    198 
    199 static LLVMValueRef
    200 emit_fetch_temporary(
    201    struct lp_build_tgsi_context * bld_base,
    202    const struct tgsi_full_src_register * reg,
    203    enum tgsi_opcode_type stype,
    204    unsigned swizzle)
    205 {
    206    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    207    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
    208    LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
    209    LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
    210    assert(!reg->Register.Indirect);
    211    if (!res)
    212       return bld->bld_base.base.undef;
    213 
    214    return res;
    215 }
    216 
    217 /**
    218  * Register store.
    219  */
    220 void
    221 lp_emit_store_aos(
    222    struct lp_build_tgsi_aos_context *bld,
    223    const struct tgsi_full_instruction *inst,
    224    unsigned index,
    225    LLVMValueRef value)
    226 {
    227    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    228    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
    229    LLVMValueRef mask = NULL;
    230    LLVMValueRef ptr;
    231 
    232    /*
    233     * Saturate the value
    234     */
    235 
    236    switch (inst->Instruction.Saturate) {
    237    case TGSI_SAT_NONE:
    238       break;
    239 
    240    case TGSI_SAT_ZERO_ONE:
    241       value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
    242       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
    243       break;
    244 
    245    case TGSI_SAT_MINUS_PLUS_ONE:
    246       value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
    247       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
    248       break;
    249 
    250    default:
    251       assert(0);
    252    }
    253 
    254    /*
    255     * Translate the register file
    256     */
    257 
    258    assert(!reg->Register.Indirect);
    259 
    260    switch (reg->Register.File) {
    261    case TGSI_FILE_OUTPUT:
    262       ptr = bld->outputs[reg->Register.Index];
    263       break;
    264 
    265    case TGSI_FILE_TEMPORARY:
    266       ptr = bld->temps[reg->Register.Index];
    267       break;
    268 
    269    case TGSI_FILE_ADDRESS:
    270       ptr = bld->addr[reg->Indirect.Index];
    271       break;
    272 
    273    case TGSI_FILE_PREDICATE:
    274       ptr = bld->preds[reg->Register.Index];
    275       break;
    276 
    277    default:
    278       assert(0);
    279       return;
    280    }
    281 
    282    if (!ptr)
    283       return;
    284    /*
    285     * Predicate
    286     */
    287 
    288    if (inst->Instruction.Predicate) {
    289       LLVMValueRef pred;
    290 
    291       assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
    292 
    293       pred = LLVMBuildLoad(builder,
    294                            bld->preds[inst->Predicate.Index], "");
    295 
    296       /*
    297        * Convert the value to an integer mask.
    298        */
    299       pred = lp_build_compare(bld->bld_base.base.gallivm,
    300                                bld->bld_base.base.type,
    301                                PIPE_FUNC_NOTEQUAL,
    302                                pred,
    303                                bld->bld_base.base.zero);
    304 
    305       if (inst->Predicate.Negate) {
    306          pred = LLVMBuildNot(builder, pred, "");
    307       }
    308 
    309       pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
    310                          inst->Predicate.SwizzleX,
    311                          inst->Predicate.SwizzleY,
    312                          inst->Predicate.SwizzleZ,
    313                          inst->Predicate.SwizzleW);
    314 
    315       if (mask) {
    316          mask = LLVMBuildAnd(builder, mask, pred, "");
    317       } else {
    318          mask = pred;
    319       }
    320    }
    321 
    322    /*
    323     * Writemask
    324     */
    325 
    326    if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
    327       LLVMValueRef writemask;
    328 
    329       writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
    330                                                    bld->bld_base.base.type,
    331                                                    reg->Register.WriteMask,
    332                                                    bld->swizzles);
    333 
    334       if (mask) {
    335          mask = LLVMBuildAnd(builder, mask, writemask, "");
    336       } else {
    337          mask = writemask;
    338       }
    339    }
    340 
    341    if (mask) {
    342       LLVMValueRef orig_value;
    343 
    344       orig_value = LLVMBuildLoad(builder, ptr, "");
    345       value = lp_build_select(&bld->bld_base.base,
    346                               mask, value, orig_value);
    347    }
    348 
    349    LLVMBuildStore(builder, value, ptr);
    350 }
    351 
    352 
    353 /**
    354  * High-level instruction translators.
    355  */
    356 
    357 static LLVMValueRef
    358 emit_tex(struct lp_build_tgsi_aos_context *bld,
    359          const struct tgsi_full_instruction *inst,
    360          enum lp_build_tex_modifier modifier)
    361 {
    362    unsigned target;
    363    unsigned unit;
    364    LLVMValueRef coords;
    365    LLVMValueRef ddx;
    366    LLVMValueRef ddy;
    367    struct lp_derivatives derivs;
    368 
    369    if (!bld->sampler) {
    370       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
    371       return bld->bld_base.base.undef;
    372    }
    373 
    374    target = inst->Texture.Texture;
    375 
    376    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
    377 
    378    if (0 && modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
    379       ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
    380       ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
    381       unit = inst->Src[3].Register.Index;
    382    }  else {
    383 #if 0
    384       ddx = lp_build_ddx( &bld->bld_base.base, coords );
    385       ddy = lp_build_ddy( &bld->bld_base.base, coords );
    386 #else
    387       /* TODO */
    388       derivs.ddx_ddy[0] = bld->bld_base.base.one;
    389       derivs.ddx_ddy[1] = bld->bld_base.base.one;
    390 #endif
    391       unit = inst->Src[1].Register.Index;
    392    }
    393 
    394    return bld->sampler->emit_fetch_texel(bld->sampler,
    395                                          &bld->bld_base.base,
    396                                          target, unit,
    397                                          coords, derivs,
    398                                          modifier);
    399 }
    400 
    401 
    402 void
    403 lp_emit_declaration_aos(
    404    struct lp_build_tgsi_aos_context *bld,
    405    const struct tgsi_full_declaration *decl)
    406 {
    407    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    408    LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
    409 
    410    unsigned first = decl->Range.First;
    411    unsigned last = decl->Range.Last;
    412    unsigned idx;
    413 
    414    for (idx = first; idx <= last; ++idx) {
    415       switch (decl->Declaration.File) {
    416       case TGSI_FILE_TEMPORARY:
    417          assert(idx < LP_MAX_TGSI_TEMPS);
    418          if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
    419             LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
    420             bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
    421                                                      vec_type, array_size, "");
    422          } else {
    423             bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
    424          }
    425          break;
    426 
    427       case TGSI_FILE_OUTPUT:
    428          bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
    429          break;
    430 
    431       case TGSI_FILE_ADDRESS:
    432          assert(idx < LP_MAX_TGSI_ADDRS);
    433          bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
    434          break;
    435 
    436       case TGSI_FILE_PREDICATE:
    437          assert(idx < LP_MAX_TGSI_PREDS);
    438          bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
    439          break;
    440 
    441       default:
    442          /* don't need to declare other vars */
    443          break;
    444       }
    445    }
    446 }
    447 
    448 
    449 /**
    450  * Emit LLVM for one TGSI instruction.
    451  * \param return TRUE for success, FALSE otherwise
    452  */
    453 boolean
    454 lp_emit_instruction_aos(
    455    struct lp_build_tgsi_aos_context *bld,
    456    const struct tgsi_full_instruction *inst,
    457    const struct tgsi_opcode_info *info,
    458    int *pc)
    459 {
    460    LLVMValueRef src0, src1, src2;
    461    LLVMValueRef tmp0, tmp1;
    462    LLVMValueRef dst0 = NULL;
    463 
    464    /*
    465     * Stores and write masks are handled in a general fashion after the long
    466     * instruction opcode switch statement.
    467     *
    468     * Although not stricitly necessary, we avoid generating instructions for
    469     * channels which won't be stored, in cases where's that easy. For some
    470     * complex instructions, like texture sampling, it is more convenient to
    471     * assume a full writemask and then let LLVM optimization passes eliminate
    472     * redundant code.
    473     */
    474 
    475    (*pc)++;
    476 
    477    assert(info->num_dst <= 1);
    478    if (info->num_dst) {
    479       dst0 = bld->bld_base.base.undef;
    480    }
    481 
    482    switch (inst->Instruction.Opcode) {
    483    case TGSI_OPCODE_ARL:
    484       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    485       dst0 = lp_build_floor(&bld->bld_base.base, src0);
    486       break;
    487 
    488    case TGSI_OPCODE_MOV:
    489       dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    490       break;
    491 
    492    case TGSI_OPCODE_LIT:
    493       return FALSE;
    494 
    495    case TGSI_OPCODE_RCP:
    496    /* TGSI_OPCODE_RECIP */
    497       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    498       dst0 = lp_build_rcp(&bld->bld_base.base, src0);
    499       break;
    500 
    501    case TGSI_OPCODE_RSQ:
    502    /* TGSI_OPCODE_RECIPSQRT */
    503       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    504       tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
    505       dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
    506       break;
    507 
    508    case TGSI_OPCODE_EXP:
    509       return FALSE;
    510 
    511    case TGSI_OPCODE_LOG:
    512       return FALSE;
    513 
    514    case TGSI_OPCODE_MUL:
    515       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    516       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    517       dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
    518       break;
    519 
    520    case TGSI_OPCODE_ADD:
    521       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    522       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    523       dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
    524       break;
    525 
    526    case TGSI_OPCODE_DP3:
    527    /* TGSI_OPCODE_DOT3 */
    528       return FALSE;
    529 
    530    case TGSI_OPCODE_DP4:
    531    /* TGSI_OPCODE_DOT4 */
    532       return FALSE;
    533 
    534    case TGSI_OPCODE_DST:
    535       return FALSE;
    536 
    537    case TGSI_OPCODE_MIN:
    538       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    539       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    540       dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
    541       break;
    542 
    543    case TGSI_OPCODE_MAX:
    544       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    545       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    546       dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
    547       break;
    548 
    549    case TGSI_OPCODE_SLT:
    550    /* TGSI_OPCODE_SETLT */
    551       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    552       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    553       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
    554       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    555       break;
    556 
    557    case TGSI_OPCODE_SGE:
    558    /* TGSI_OPCODE_SETGE */
    559       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    560       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    561       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
    562       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    563       break;
    564 
    565    case TGSI_OPCODE_MAD:
    566    /* TGSI_OPCODE_MADD */
    567       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    568       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    569       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    570       tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
    571       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
    572       break;
    573 
    574    case TGSI_OPCODE_SUB:
    575       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    576       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    577       dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
    578       break;
    579 
    580    case TGSI_OPCODE_LRP:
    581       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    582       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    583       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    584       tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
    585       tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
    586       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
    587       break;
    588 
    589    case TGSI_OPCODE_CND:
    590       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    591       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    592       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    593       tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
    594       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
    595       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
    596       break;
    597 
    598    case TGSI_OPCODE_DP2A:
    599       return FALSE;
    600 
    601    case TGSI_OPCODE_FRC:
    602       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    603       tmp0 = lp_build_floor(&bld->bld_base.base, src0);
    604       dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
    605       break;
    606 
    607    case TGSI_OPCODE_CLAMP:
    608       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    609       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    610       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    611       tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
    612       dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
    613       break;
    614 
    615    case TGSI_OPCODE_FLR:
    616       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    617       dst0 = lp_build_floor(&bld->bld_base.base, src0);
    618       break;
    619 
    620    case TGSI_OPCODE_ROUND:
    621       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    622       dst0 = lp_build_round(&bld->bld_base.base, src0);
    623       break;
    624 
    625    case TGSI_OPCODE_EX2:
    626       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    627       tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
    628       dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
    629       break;
    630 
    631    case TGSI_OPCODE_LG2:
    632       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    633       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    634       dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
    635       break;
    636 
    637    case TGSI_OPCODE_POW:
    638       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    639       src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    640       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    641       src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
    642       dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
    643       break;
    644 
    645    case TGSI_OPCODE_XPD:
    646       return FALSE;
    647 
    648    case TGSI_OPCODE_RCC:
    649       /* deprecated? */
    650       assert(0);
    651       return FALSE;
    652 
    653    case TGSI_OPCODE_DPH:
    654       return FALSE;
    655 
    656    case TGSI_OPCODE_COS:
    657       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    658       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    659       dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
    660       break;
    661 
    662    case TGSI_OPCODE_DDX:
    663       return FALSE;
    664 
    665    case TGSI_OPCODE_DDY:
    666       return FALSE;
    667 
    668    case TGSI_OPCODE_KILP:
    669       /* predicated kill */
    670       return FALSE;
    671 
    672    case TGSI_OPCODE_KIL:
    673       /* conditional kill */
    674       return FALSE;
    675 
    676    case TGSI_OPCODE_PK2H:
    677       return FALSE;
    678       break;
    679 
    680    case TGSI_OPCODE_PK2US:
    681       return FALSE;
    682       break;
    683 
    684    case TGSI_OPCODE_PK4B:
    685       return FALSE;
    686       break;
    687 
    688    case TGSI_OPCODE_PK4UB:
    689       return FALSE;
    690 
    691    case TGSI_OPCODE_RFL:
    692       return FALSE;
    693 
    694    case TGSI_OPCODE_SEQ:
    695       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    696       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    697       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
    698       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    699       break;
    700 
    701    case TGSI_OPCODE_SFL:
    702       dst0 = bld->bld_base.base.zero;
    703       break;
    704 
    705    case TGSI_OPCODE_SGT:
    706       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    707       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    708       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
    709       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    710       break;
    711 
    712    case TGSI_OPCODE_SIN:
    713       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    714       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    715       dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
    716       break;
    717 
    718    case TGSI_OPCODE_SLE:
    719       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    720       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    721       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
    722       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    723       break;
    724 
    725    case TGSI_OPCODE_SNE:
    726       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    727       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    728       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
    729       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    730       break;
    731 
    732    case TGSI_OPCODE_STR:
    733       dst0 = bld->bld_base.base.one;
    734       break;
    735 
    736    case TGSI_OPCODE_TEX:
    737       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
    738       break;
    739 
    740    case TGSI_OPCODE_TXD:
    741       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
    742       break;
    743 
    744    case TGSI_OPCODE_UP2H:
    745       /* deprecated */
    746       assert (0);
    747       return FALSE;
    748       break;
    749 
    750    case TGSI_OPCODE_UP2US:
    751       /* deprecated */
    752       assert(0);
    753       return FALSE;
    754       break;
    755 
    756    case TGSI_OPCODE_UP4B:
    757       /* deprecated */
    758       assert(0);
    759       return FALSE;
    760       break;
    761 
    762    case TGSI_OPCODE_UP4UB:
    763       /* deprecated */
    764       assert(0);
    765       return FALSE;
    766       break;
    767 
    768    case TGSI_OPCODE_X2D:
    769       /* deprecated? */
    770       assert(0);
    771       return FALSE;
    772       break;
    773 
    774    case TGSI_OPCODE_ARA:
    775       /* deprecated */
    776       assert(0);
    777       return FALSE;
    778       break;
    779 
    780    case TGSI_OPCODE_ARR:
    781       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    782       dst0 = lp_build_round(&bld->bld_base.base, src0);
    783       break;
    784 
    785    case TGSI_OPCODE_BRA:
    786       /* deprecated */
    787       assert(0);
    788       return FALSE;
    789       break;
    790 
    791    case TGSI_OPCODE_CAL:
    792       return FALSE;
    793 
    794    case TGSI_OPCODE_RET:
    795       return FALSE;
    796 
    797    case TGSI_OPCODE_END:
    798       *pc = -1;
    799       break;
    800 
    801    case TGSI_OPCODE_SSG:
    802    /* TGSI_OPCODE_SGN */
    803       tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    804       dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
    805       break;
    806 
    807    case TGSI_OPCODE_CMP:
    808       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    809       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    810       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    811       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
    812       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
    813       break;
    814 
    815    case TGSI_OPCODE_SCS:
    816       return FALSE;
    817 
    818    case TGSI_OPCODE_TXB:
    819       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
    820       break;
    821 
    822    case TGSI_OPCODE_NRM:
    823       /* fall-through */
    824    case TGSI_OPCODE_NRM4:
    825       return FALSE;
    826 
    827    case TGSI_OPCODE_DIV:
    828       /* deprecated */
    829       assert(0);
    830       return FALSE;
    831       break;
    832 
    833    case TGSI_OPCODE_DP2:
    834       return FALSE;
    835 
    836    case TGSI_OPCODE_TXL:
    837       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
    838       break;
    839 
    840    case TGSI_OPCODE_TXP:
    841       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
    842       break;
    843 
    844    case TGSI_OPCODE_BRK:
    845       return FALSE;
    846 
    847    case TGSI_OPCODE_IF:
    848       return FALSE;
    849 
    850    case TGSI_OPCODE_BGNLOOP:
    851       return FALSE;
    852 
    853    case TGSI_OPCODE_BGNSUB:
    854       return FALSE;
    855 
    856    case TGSI_OPCODE_ELSE:
    857       return FALSE;
    858 
    859    case TGSI_OPCODE_ENDIF:
    860       return FALSE;
    861 
    862    case TGSI_OPCODE_ENDLOOP:
    863       return FALSE;
    864 
    865    case TGSI_OPCODE_ENDSUB:
    866       return FALSE;
    867 
    868    case TGSI_OPCODE_PUSHA:
    869       /* deprecated? */
    870       assert(0);
    871       return FALSE;
    872       break;
    873 
    874    case TGSI_OPCODE_POPA:
    875       /* deprecated? */
    876       assert(0);
    877       return FALSE;
    878       break;
    879 
    880    case TGSI_OPCODE_CEIL:
    881       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    882       dst0 = lp_build_ceil(&bld->bld_base.base, src0);
    883       break;
    884 
    885    case TGSI_OPCODE_I2F:
    886       /* deprecated? */
    887       assert(0);
    888       return FALSE;
    889       break;
    890 
    891    case TGSI_OPCODE_NOT:
    892       /* deprecated? */
    893       assert(0);
    894       return FALSE;
    895       break;
    896 
    897    case TGSI_OPCODE_TRUNC:
    898       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    899       dst0 = lp_build_trunc(&bld->bld_base.base, src0);
    900       break;
    901 
    902    case TGSI_OPCODE_SHL:
    903       /* deprecated? */
    904       assert(0);
    905       return FALSE;
    906       break;
    907 
    908    case TGSI_OPCODE_ISHR:
    909       /* deprecated? */
    910       assert(0);
    911       return FALSE;
    912       break;
    913 
    914    case TGSI_OPCODE_AND:
    915       /* deprecated? */
    916       assert(0);
    917       return FALSE;
    918       break;
    919 
    920    case TGSI_OPCODE_OR:
    921       /* deprecated? */
    922       assert(0);
    923       return FALSE;
    924       break;
    925 
    926    case TGSI_OPCODE_MOD:
    927       /* deprecated? */
    928       assert(0);
    929       return FALSE;
    930       break;
    931 
    932    case TGSI_OPCODE_XOR:
    933       /* deprecated? */
    934       assert(0);
    935       return FALSE;
    936       break;
    937 
    938    case TGSI_OPCODE_SAD:
    939       /* deprecated? */
    940       assert(0);
    941       return FALSE;
    942       break;
    943 
    944    case TGSI_OPCODE_TXF:
    945       /* deprecated? */
    946       assert(0);
    947       return FALSE;
    948       break;
    949 
    950    case TGSI_OPCODE_TXQ:
    951       /* deprecated? */
    952       assert(0);
    953       return FALSE;
    954       break;
    955 
    956    case TGSI_OPCODE_CONT:
    957       return FALSE;
    958 
    959    case TGSI_OPCODE_EMIT:
    960       return FALSE;
    961       break;
    962 
    963    case TGSI_OPCODE_ENDPRIM:
    964       return FALSE;
    965       break;
    966 
    967    case TGSI_OPCODE_NOP:
    968       break;
    969 
    970    default:
    971       return FALSE;
    972    }
    973 
    974    if (info->num_dst) {
    975       lp_emit_store_aos(bld, inst, 0, dst0);
    976    }
    977 
    978    return TRUE;
    979 }
    980 
    981 
    982 void
    983 lp_build_tgsi_aos(struct gallivm_state *gallivm,
    984                   const struct tgsi_token *tokens,
    985                   struct lp_type type,
    986                   const unsigned char swizzles[4],
    987                   LLVMValueRef consts_ptr,
    988                   const LLVMValueRef *inputs,
    989                   LLVMValueRef *outputs,
    990                   struct lp_build_sampler_aos *sampler,
    991                   const struct tgsi_shader_info *info)
    992 {
    993    struct lp_build_tgsi_aos_context bld;
    994    struct tgsi_parse_context parse;
    995    uint num_immediates = 0;
    996    unsigned chan;
    997    int pc = 0;
    998 
    999    /* Setup build context */
   1000    memset(&bld, 0, sizeof bld);
   1001    lp_build_context_init(&bld.bld_base.base, gallivm, type);
   1002    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
   1003    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
   1004    lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
   1005 
   1006    for (chan = 0; chan < 4; ++chan) {
   1007       bld.swizzles[chan] = swizzles[chan];
   1008       bld.inv_swizzles[swizzles[chan]] = chan;
   1009    }
   1010 
   1011    bld.inputs = inputs;
   1012    bld.outputs = outputs;
   1013    bld.consts_ptr = consts_ptr;
   1014    bld.sampler = sampler;
   1015    bld.indirect_files = info->indirect_files;
   1016    bld.bld_base.emit_swizzle = swizzle_aos;
   1017    bld.bld_base.info = info;
   1018 
   1019    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
   1020    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
   1021    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
   1022    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
   1023 
   1024    /* Set opcode actions */
   1025    lp_set_default_actions_cpu(&bld.bld_base);
   1026 
   1027    if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
   1028       return;
   1029    }
   1030 
   1031    tgsi_parse_init(&parse, tokens);
   1032 
   1033    while (!tgsi_parse_end_of_tokens(&parse)) {
   1034       tgsi_parse_token(&parse);
   1035 
   1036       switch(parse.FullToken.Token.Type) {
   1037       case TGSI_TOKEN_TYPE_DECLARATION:
   1038          /* Inputs already interpolated */
   1039          lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
   1040          break;
   1041 
   1042       case TGSI_TOKEN_TYPE_INSTRUCTION:
   1043          /* save expanded instruction */
   1044          lp_bld_tgsi_add_instruction(&bld.bld_base,
   1045                                      &parse.FullToken.FullInstruction);
   1046          break;
   1047 
   1048       case TGSI_TOKEN_TYPE_IMMEDIATE:
   1049          /* simply copy the immediate values into the next immediates[] slot */
   1050          {
   1051             const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
   1052             float imm[4];
   1053             assert(size <= 4);
   1054             assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
   1055             for (chan = 0; chan < 4; ++chan) {
   1056                imm[chan] = 0.0f;
   1057             }
   1058             for (chan = 0; chan < size; ++chan) {
   1059                unsigned swizzle = bld.swizzles[chan];
   1060                imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
   1061             }
   1062             bld.immediates[num_immediates] =
   1063                      lp_build_const_aos(gallivm, type,
   1064                                         imm[0], imm[1], imm[2], imm[3],
   1065                                         NULL);
   1066             num_immediates++;
   1067          }
   1068          break;
   1069 
   1070       case TGSI_TOKEN_TYPE_PROPERTY:
   1071          break;
   1072 
   1073       default:
   1074          assert(0);
   1075       }
   1076    }
   1077 
   1078    while (pc != -1) {
   1079       struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
   1080       const struct tgsi_opcode_info *opcode_info =
   1081          tgsi_get_opcode_info(instr->Instruction.Opcode);
   1082       if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
   1083          _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
   1084                        opcode_info->mnemonic);
   1085    }
   1086 
   1087    if (0) {
   1088       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
   1089       LLVMValueRef function = LLVMGetBasicBlockParent(block);
   1090       debug_printf("11111111111111111111111111111 \n");
   1091       tgsi_dump(tokens, 0);
   1092       lp_debug_dump_value(function);
   1093       debug_printf("2222222222222222222222222222 \n");
   1094    }
   1095    tgsi_parse_free(&parse);
   1096    FREE(bld.bld_base.instructions);
   1097 
   1098    if (0) {
   1099       LLVMModuleRef module = LLVMGetGlobalParent(
   1100          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
   1101       LLVMDumpModule(module);
   1102    }
   1103 
   1104 }
   1105 
   1106