Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * @file
     30  * TGSI to LLVM IR translation -- AoS.
     31  *
     32  * FIXME:
     33  * - No control flow support: the existing control flow code should be factored
     34  * out into from the SoA code into a common module and shared.
     35  * - No derivatives. Derivate logic should be pluggable, just like the samplers.
     36  *
     37  * @author Jose Fonseca <jfonseca (at) vmware.com>
     38  */
     39 
     40 #include "pipe/p_config.h"
     41 #include "pipe/p_shader_tokens.h"
     42 #include "util/u_debug.h"
     43 #include "util/u_math.h"
     44 #include "util/u_memory.h"
     45 #include "tgsi/tgsi_dump.h"
     46 #include "tgsi/tgsi_info.h"
     47 #include "tgsi/tgsi_parse.h"
     48 #include "tgsi/tgsi_util.h"
     49 #include "tgsi/tgsi_scan.h"
     50 #include "lp_bld_type.h"
     51 #include "lp_bld_const.h"
     52 #include "lp_bld_arit.h"
     53 #include "lp_bld_logic.h"
     54 #include "lp_bld_swizzle.h"
     55 #include "lp_bld_flow.h"
     56 #include "lp_bld_quad.h"
     57 #include "lp_bld_tgsi.h"
     58 #include "lp_bld_debug.h"
     59 #include "lp_bld_sample.h"
     60 
     61 
     62 /**
     63  * Wrapper around lp_build_swizzle_aos which translates swizzles to another
     64  * ordering.
     65  */
     66 static LLVMValueRef
     67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
     68             LLVMValueRef a,
     69             unsigned swizzle_x,
     70             unsigned swizzle_y,
     71             unsigned swizzle_z,
     72             unsigned swizzle_w)
     73 {
     74    unsigned char swizzles[4];
     75    struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
     76 
     77    assert(swizzle_x < 4);
     78    assert(swizzle_y < 4);
     79    assert(swizzle_z < 4);
     80    assert(swizzle_w < 4);
     81 
     82    swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
     83    swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
     84    swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
     85    swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
     86 
     87    return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
     88 }
     89 
     90 
     91 static LLVMValueRef
     92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
     93                    LLVMValueRef a,
     94                    unsigned chan)
     95 {
     96    chan = bld->swizzles[chan];
     97    return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
     98 }
     99 
    100 
    101 static LLVMValueRef
    102 emit_fetch_constant(
    103    struct lp_build_tgsi_context * bld_base,
    104    const struct tgsi_full_src_register * reg,
    105    enum tgsi_opcode_type stype,
    106    unsigned swizzle)
    107 {
    108    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    109    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
    110    struct lp_type type = bld_base->base.type;
    111    LLVMValueRef res;
    112    unsigned chan;
    113 
    114    assert(!reg->Register.Indirect);
    115 
    116    /*
    117     * Get the constants components
    118     */
    119 
    120    res = bld->bld_base.base.undef;
    121    for (chan = 0; chan < 4; ++chan) {
    122       LLVMValueRef index;
    123       LLVMValueRef scalar_ptr;
    124       LLVMValueRef scalar;
    125       LLVMValueRef swizzle;
    126 
    127       index = lp_build_const_int32(bld->bld_base.base.gallivm,
    128                                    reg->Register.Index * 4 + chan);
    129 
    130       scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
    131 
    132       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
    133 
    134       lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
    135 
    136       /*
    137        * NOTE: constants array is always assumed to be RGBA
    138        */
    139 
    140       swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
    141                                      bld->swizzles[chan]);
    142 
    143       res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
    144    }
    145 
    146    /*
    147     * Broadcast the first quaternion to all others.
    148     *
    149     * XXX: could be factored into a reusable function.
    150     */
    151 
    152    if (type.length > 4) {
    153       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
    154       unsigned i;
    155 
    156       for (chan = 0; chan < 4; ++chan) {
    157          shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
    158       }
    159 
    160       for (i = 4; i < type.length; ++i) {
    161          shuffles[i] = shuffles[i % 4];
    162       }
    163 
    164       res = LLVMBuildShuffleVector(builder,
    165                                    res, bld->bld_base.base.undef,
    166                                    LLVMConstVector(shuffles, type.length),
    167                                    "");
    168    }
    169    return res;
    170 }
    171 
    172 static LLVMValueRef
    173 emit_fetch_immediate(
    174    struct lp_build_tgsi_context * bld_base,
    175    const struct tgsi_full_src_register * reg,
    176    enum tgsi_opcode_type stype,
    177    unsigned swizzle)
    178 {
    179    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    180    LLVMValueRef res = bld->immediates[reg->Register.Index];
    181    assert(res);
    182    return res;
    183 }
    184 
    185 static LLVMValueRef
    186 emit_fetch_input(
    187    struct lp_build_tgsi_context * bld_base,
    188    const struct tgsi_full_src_register * reg,
    189    enum tgsi_opcode_type stype,
    190    unsigned swizzle)
    191 {
    192    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    193    LLVMValueRef res = bld->inputs[reg->Register.Index];
    194    assert(!reg->Register.Indirect);
    195    assert(res);
    196    return res;
    197 }
    198 
    199 static LLVMValueRef
    200 emit_fetch_temporary(
    201    struct lp_build_tgsi_context * bld_base,
    202    const struct tgsi_full_src_register * reg,
    203    enum tgsi_opcode_type stype,
    204    unsigned swizzle)
    205 {
    206    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    207    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
    208    LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
    209    LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
    210    assert(!reg->Register.Indirect);
    211    if (!res)
    212       return bld->bld_base.base.undef;
    213 
    214    return res;
    215 }
    216 
    217 /**
    218  * Register store.
    219  */
    220 void
    221 lp_emit_store_aos(
    222    struct lp_build_tgsi_aos_context *bld,
    223    const struct tgsi_full_instruction *inst,
    224    unsigned index,
    225    LLVMValueRef value)
    226 {
    227    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    228    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
    229    LLVMValueRef mask = NULL;
    230    LLVMValueRef ptr;
    231 
    232    /*
    233     * Saturate the value
    234     */
    235    if (inst->Instruction.Saturate) {
    236       value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
    237       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
    238    }
    239 
    240    /*
    241     * Translate the register file
    242     */
    243 
    244    assert(!reg->Register.Indirect);
    245 
    246    switch (reg->Register.File) {
    247    case TGSI_FILE_OUTPUT:
    248       ptr = bld->outputs[reg->Register.Index];
    249       break;
    250 
    251    case TGSI_FILE_TEMPORARY:
    252       ptr = bld->temps[reg->Register.Index];
    253       break;
    254 
    255    case TGSI_FILE_ADDRESS:
    256       ptr = bld->addr[reg->Indirect.Index];
    257       break;
    258 
    259    case TGSI_FILE_PREDICATE:
    260       ptr = bld->preds[reg->Register.Index];
    261       break;
    262 
    263    default:
    264       assert(0);
    265       return;
    266    }
    267 
    268    if (!ptr)
    269       return;
    270    /*
    271     * Predicate
    272     */
    273 
    274    if (inst->Instruction.Predicate) {
    275       LLVMValueRef pred;
    276 
    277       assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
    278 
    279       pred = LLVMBuildLoad(builder,
    280                            bld->preds[inst->Predicate.Index], "");
    281 
    282       /*
    283        * Convert the value to an integer mask.
    284        */
    285       pred = lp_build_compare(bld->bld_base.base.gallivm,
    286                                bld->bld_base.base.type,
    287                                PIPE_FUNC_NOTEQUAL,
    288                                pred,
    289                                bld->bld_base.base.zero);
    290 
    291       if (inst->Predicate.Negate) {
    292          pred = LLVMBuildNot(builder, pred, "");
    293       }
    294 
    295       pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
    296                          inst->Predicate.SwizzleX,
    297                          inst->Predicate.SwizzleY,
    298                          inst->Predicate.SwizzleZ,
    299                          inst->Predicate.SwizzleW);
    300 
    301       if (mask) {
    302          mask = LLVMBuildAnd(builder, mask, pred, "");
    303       } else {
    304          mask = pred;
    305       }
    306    }
    307 
    308    /*
    309     * Writemask
    310     */
    311 
    312    if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
    313       LLVMValueRef writemask;
    314 
    315       writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
    316                                                    bld->bld_base.base.type,
    317                                                    reg->Register.WriteMask,
    318                                                    TGSI_NUM_CHANNELS,
    319                                                    bld->swizzles);
    320 
    321       if (mask) {
    322          mask = LLVMBuildAnd(builder, mask, writemask, "");
    323       } else {
    324          mask = writemask;
    325       }
    326    }
    327 
    328    if (mask) {
    329       LLVMValueRef orig_value;
    330 
    331       orig_value = LLVMBuildLoad(builder, ptr, "");
    332       value = lp_build_select(&bld->bld_base.base,
    333                               mask, value, orig_value);
    334    }
    335 
    336    LLVMBuildStore(builder, value, ptr);
    337 }
    338 
    339 
    340 /**
    341  * High-level instruction translators.
    342  */
    343 
    344 static LLVMValueRef
    345 emit_tex(struct lp_build_tgsi_aos_context *bld,
    346          const struct tgsi_full_instruction *inst,
    347          enum lp_build_tex_modifier modifier)
    348 {
    349    unsigned target;
    350    unsigned unit;
    351    LLVMValueRef coords;
    352    struct lp_derivatives derivs = { {NULL}, {NULL} };
    353 
    354    if (!bld->sampler) {
    355       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
    356       return bld->bld_base.base.undef;
    357    }
    358 
    359    target = inst->Texture.Texture;
    360 
    361    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
    362 
    363    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
    364       /* probably not going to work */
    365       derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
    366       derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
    367       unit = inst->Src[3].Register.Index;
    368    }
    369    else {
    370       unit = inst->Src[1].Register.Index;
    371    }
    372    return bld->sampler->emit_fetch_texel(bld->sampler,
    373                                          &bld->bld_base.base,
    374                                          target, unit,
    375                                          coords, derivs,
    376                                          modifier);
    377 }
    378 
    379 
    380 static LLVMValueRef
    381 emit_sample(struct lp_build_tgsi_aos_context *bld,
    382             const struct tgsi_full_instruction *inst,
    383             enum lp_build_tex_modifier modifier)
    384 {
    385    unsigned target;
    386    unsigned unit;
    387    LLVMValueRef coords;
    388    struct lp_derivatives derivs = { {NULL}, {NULL} };
    389 
    390    if (!bld->sampler) {
    391       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
    392       return bld->bld_base.base.undef;
    393    }
    394 
    395    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
    396 
    397    /* ignore modifiers, can't handle different sampler / sampler view, etc... */
    398    unit = inst->Src[1].Register.Index;
    399    assert(inst->Src[2].Register.Index == unit);
    400 
    401    target = bld->sv[unit].Resource;
    402 
    403    return bld->sampler->emit_fetch_texel(bld->sampler,
    404                                          &bld->bld_base.base,
    405                                          target, unit,
    406                                          coords, derivs,
    407                                          modifier);
    408 }
    409 
    410 
    411 void
    412 lp_emit_declaration_aos(
    413    struct lp_build_tgsi_aos_context *bld,
    414    const struct tgsi_full_declaration *decl)
    415 {
    416    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    417    LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
    418 
    419    unsigned first = decl->Range.First;
    420    unsigned last = decl->Range.Last;
    421    unsigned idx;
    422 
    423    for (idx = first; idx <= last; ++idx) {
    424       switch (decl->Declaration.File) {
    425       case TGSI_FILE_TEMPORARY:
    426          assert(idx < LP_MAX_INLINED_TEMPS);
    427          if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
    428             LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
    429             bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
    430                                                      vec_type, array_size, "");
    431          } else {
    432             bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
    433          }
    434          break;
    435 
    436       case TGSI_FILE_OUTPUT:
    437          bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
    438          break;
    439 
    440       case TGSI_FILE_ADDRESS:
    441          assert(idx < LP_MAX_TGSI_ADDRS);
    442          bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
    443          break;
    444 
    445       case TGSI_FILE_PREDICATE:
    446          assert(idx < LP_MAX_TGSI_PREDS);
    447          bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
    448          break;
    449 
    450       case TGSI_FILE_SAMPLER_VIEW:
    451          /*
    452           * The target stored here MUST match whatever there actually
    453           * is in the set sampler views (what about return type?).
    454           */
    455          assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
    456          for (idx = first; idx <= last; ++idx) {
    457             bld->sv[idx] = decl->SamplerView;
    458          }
    459          break;
    460 
    461       default:
    462          /* don't need to declare other vars */
    463          break;
    464       }
    465    }
    466 }
    467 
    468 
    469 /**
    470  * Emit LLVM for one TGSI instruction.
    471  * \param return TRUE for success, FALSE otherwise
    472  */
    473 boolean
    474 lp_emit_instruction_aos(
    475    struct lp_build_tgsi_aos_context *bld,
    476    const struct tgsi_full_instruction *inst,
    477    const struct tgsi_opcode_info *info,
    478    int *pc)
    479 {
    480    LLVMValueRef src0, src1, src2;
    481    LLVMValueRef tmp0;
    482    LLVMValueRef dst0 = NULL;
    483 
    484    /*
    485     * Stores and write masks are handled in a general fashion after the long
    486     * instruction opcode switch statement.
    487     *
    488     * Although not stricitly necessary, we avoid generating instructions for
    489     * channels which won't be stored, in cases where's that easy. For some
    490     * complex instructions, like texture sampling, it is more convenient to
    491     * assume a full writemask and then let LLVM optimization passes eliminate
    492     * redundant code.
    493     */
    494 
    495    (*pc)++;
    496 
    497    assert(info->num_dst <= 1);
    498    if (info->num_dst) {
    499       dst0 = bld->bld_base.base.undef;
    500    }
    501 
    502    switch (inst->Instruction.Opcode) {
    503    case TGSI_OPCODE_ARL:
    504       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    505       dst0 = lp_build_floor(&bld->bld_base.base, src0);
    506       break;
    507 
    508    case TGSI_OPCODE_MOV:
    509       dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    510       break;
    511 
    512    case TGSI_OPCODE_LIT:
    513       return FALSE;
    514 
    515    case TGSI_OPCODE_RCP:
    516    /* TGSI_OPCODE_RECIP */
    517       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    518       dst0 = lp_build_rcp(&bld->bld_base.base, src0);
    519       break;
    520 
    521    case TGSI_OPCODE_RSQ:
    522    /* TGSI_OPCODE_RECIPSQRT */
    523       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    524       tmp0 = lp_build_abs(&bld->bld_base.base, src0);
    525       dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
    526       break;
    527 
    528    case TGSI_OPCODE_EXP:
    529       return FALSE;
    530 
    531    case TGSI_OPCODE_LOG:
    532       return FALSE;
    533 
    534    case TGSI_OPCODE_MUL:
    535       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    536       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    537       dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
    538       break;
    539 
    540    case TGSI_OPCODE_ADD:
    541       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    542       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    543       dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
    544       break;
    545 
    546    case TGSI_OPCODE_DP3:
    547    /* TGSI_OPCODE_DOT3 */
    548       return FALSE;
    549 
    550    case TGSI_OPCODE_DP4:
    551    /* TGSI_OPCODE_DOT4 */
    552       return FALSE;
    553 
    554    case TGSI_OPCODE_DST:
    555       return FALSE;
    556 
    557    case TGSI_OPCODE_MIN:
    558       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    559       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    560       dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
    561       break;
    562 
    563    case TGSI_OPCODE_MAX:
    564       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    565       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    566       dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
    567       break;
    568 
    569    case TGSI_OPCODE_SLT:
    570    /* TGSI_OPCODE_SETLT */
    571       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    572       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    573       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
    574       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    575       break;
    576 
    577    case TGSI_OPCODE_SGE:
    578    /* TGSI_OPCODE_SETGE */
    579       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    580       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    581       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
    582       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    583       break;
    584 
    585    case TGSI_OPCODE_MAD:
    586    /* TGSI_OPCODE_MADD */
    587       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    588       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    589       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    590       tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
    591       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
    592       break;
    593 
    594    case TGSI_OPCODE_LRP:
    595       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    596       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    597       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    598       tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
    599       tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
    600       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
    601       break;
    602 
    603    case TGSI_OPCODE_DP2A:
    604       return FALSE;
    605 
    606    case TGSI_OPCODE_FRC:
    607       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    608       tmp0 = lp_build_floor(&bld->bld_base.base, src0);
    609       dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
    610       break;
    611 
    612    case TGSI_OPCODE_CLAMP:
    613       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    614       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    615       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    616       tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
    617       dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
    618       break;
    619 
    620    case TGSI_OPCODE_FLR:
    621       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    622       dst0 = lp_build_floor(&bld->bld_base.base, src0);
    623       break;
    624 
    625    case TGSI_OPCODE_ROUND:
    626       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    627       dst0 = lp_build_round(&bld->bld_base.base, src0);
    628       break;
    629 
    630    case TGSI_OPCODE_EX2:
    631       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    632       tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
    633       dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
    634       break;
    635 
    636    case TGSI_OPCODE_LG2:
    637       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    638       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    639       dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
    640       break;
    641 
    642    case TGSI_OPCODE_POW:
    643       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    644       src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    645       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    646       src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
    647       dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
    648       break;
    649 
    650    case TGSI_OPCODE_XPD:
    651       return FALSE;
    652 
    653    case TGSI_OPCODE_DPH:
    654       return FALSE;
    655 
    656    case TGSI_OPCODE_COS:
    657       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    658       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    659       dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
    660       break;
    661 
    662    case TGSI_OPCODE_DDX:
    663       return FALSE;
    664 
    665    case TGSI_OPCODE_DDY:
    666       return FALSE;
    667 
    668    case TGSI_OPCODE_KILL:
    669       return FALSE;
    670 
    671    case TGSI_OPCODE_KILL_IF:
    672       return FALSE;
    673 
    674    case TGSI_OPCODE_PK2H:
    675       return FALSE;
    676       break;
    677 
    678    case TGSI_OPCODE_PK2US:
    679       return FALSE;
    680       break;
    681 
    682    case TGSI_OPCODE_PK4B:
    683       return FALSE;
    684       break;
    685 
    686    case TGSI_OPCODE_PK4UB:
    687       return FALSE;
    688 
    689    case TGSI_OPCODE_SEQ:
    690       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    691       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    692       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
    693       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    694       break;
    695 
    696    case TGSI_OPCODE_SGT:
    697       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    698       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    699       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
    700       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    701       break;
    702 
    703    case TGSI_OPCODE_SIN:
    704       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    705       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    706       dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
    707       break;
    708 
    709    case TGSI_OPCODE_SLE:
    710       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    711       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    712       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
    713       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    714       break;
    715 
    716    case TGSI_OPCODE_SNE:
    717       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    718       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    719       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
    720       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    721       break;
    722 
    723    case TGSI_OPCODE_TEX:
    724       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
    725       break;
    726 
    727    case TGSI_OPCODE_TXD:
    728       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
    729       break;
    730 
    731    case TGSI_OPCODE_UP2H:
    732       /* deprecated */
    733       assert (0);
    734       return FALSE;
    735       break;
    736 
    737    case TGSI_OPCODE_UP2US:
    738       /* deprecated */
    739       assert(0);
    740       return FALSE;
    741       break;
    742 
    743    case TGSI_OPCODE_UP4B:
    744       /* deprecated */
    745       assert(0);
    746       return FALSE;
    747       break;
    748 
    749    case TGSI_OPCODE_UP4UB:
    750       /* deprecated */
    751       assert(0);
    752       return FALSE;
    753       break;
    754 
    755    case TGSI_OPCODE_ARR:
    756       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    757       dst0 = lp_build_round(&bld->bld_base.base, src0);
    758       break;
    759 
    760    case TGSI_OPCODE_CAL:
    761       return FALSE;
    762 
    763    case TGSI_OPCODE_RET:
    764       /* safe to ignore at end */
    765       break;
    766 
    767    case TGSI_OPCODE_END:
    768       *pc = -1;
    769       break;
    770 
    771    case TGSI_OPCODE_SSG:
    772    /* TGSI_OPCODE_SGN */
    773       tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    774       dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
    775       break;
    776 
    777    case TGSI_OPCODE_CMP:
    778       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    779       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    780       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    781       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
    782       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
    783       break;
    784 
    785    case TGSI_OPCODE_SCS:
    786       return FALSE;
    787 
    788    case TGSI_OPCODE_TXB:
    789       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
    790       break;
    791 
    792    case TGSI_OPCODE_DIV:
    793       assert(0);
    794       return FALSE;
    795       break;
    796 
    797    case TGSI_OPCODE_DP2:
    798       return FALSE;
    799 
    800    case TGSI_OPCODE_TXL:
    801       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
    802       break;
    803 
    804    case TGSI_OPCODE_TXP:
    805       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
    806       break;
    807 
    808    case TGSI_OPCODE_BRK:
    809       return FALSE;
    810 
    811    case TGSI_OPCODE_IF:
    812    case TGSI_OPCODE_UIF:
    813       return FALSE;
    814 
    815    case TGSI_OPCODE_BGNLOOP:
    816       return FALSE;
    817 
    818    case TGSI_OPCODE_BGNSUB:
    819       return FALSE;
    820 
    821    case TGSI_OPCODE_ELSE:
    822       return FALSE;
    823 
    824    case TGSI_OPCODE_ENDIF:
    825       return FALSE;
    826 
    827    case TGSI_OPCODE_ENDLOOP:
    828       return FALSE;
    829 
    830    case TGSI_OPCODE_ENDSUB:
    831       return FALSE;
    832 
    833    case TGSI_OPCODE_PUSHA:
    834       /* deprecated? */
    835       assert(0);
    836       return FALSE;
    837       break;
    838 
    839    case TGSI_OPCODE_POPA:
    840       /* deprecated? */
    841       assert(0);
    842       return FALSE;
    843       break;
    844 
    845    case TGSI_OPCODE_CEIL:
    846       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    847       dst0 = lp_build_ceil(&bld->bld_base.base, src0);
    848       break;
    849 
    850    case TGSI_OPCODE_I2F:
    851       assert(0);
    852       return FALSE;
    853       break;
    854 
    855    case TGSI_OPCODE_NOT:
    856       assert(0);
    857       return FALSE;
    858       break;
    859 
    860    case TGSI_OPCODE_TRUNC:
    861       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    862       dst0 = lp_build_trunc(&bld->bld_base.base, src0);
    863       break;
    864 
    865    case TGSI_OPCODE_SHL:
    866       assert(0);
    867       return FALSE;
    868       break;
    869 
    870    case TGSI_OPCODE_ISHR:
    871       assert(0);
    872       return FALSE;
    873       break;
    874 
    875    case TGSI_OPCODE_AND:
    876       assert(0);
    877       return FALSE;
    878       break;
    879 
    880    case TGSI_OPCODE_OR:
    881       assert(0);
    882       return FALSE;
    883       break;
    884 
    885    case TGSI_OPCODE_MOD:
    886       assert(0);
    887       return FALSE;
    888       break;
    889 
    890    case TGSI_OPCODE_XOR:
    891       assert(0);
    892       return FALSE;
    893       break;
    894 
    895    case TGSI_OPCODE_SAD:
    896       assert(0);
    897       return FALSE;
    898       break;
    899 
    900    case TGSI_OPCODE_TXF:
    901       assert(0);
    902       return FALSE;
    903       break;
    904 
    905    case TGSI_OPCODE_TXQ:
    906       assert(0);
    907       return FALSE;
    908       break;
    909 
    910    case TGSI_OPCODE_CONT:
    911       return FALSE;
    912 
    913    case TGSI_OPCODE_EMIT:
    914       return FALSE;
    915       break;
    916 
    917    case TGSI_OPCODE_ENDPRIM:
    918       return FALSE;
    919       break;
    920 
    921    case TGSI_OPCODE_NOP:
    922       break;
    923 
    924    case TGSI_OPCODE_SAMPLE:
    925       dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
    926       break;
    927 
    928    default:
    929       return FALSE;
    930    }
    931 
    932    if (info->num_dst) {
    933       lp_emit_store_aos(bld, inst, 0, dst0);
    934    }
    935 
    936    return TRUE;
    937 }
    938 
    939 
    940 void
    941 lp_build_tgsi_aos(struct gallivm_state *gallivm,
    942                   const struct tgsi_token *tokens,
    943                   struct lp_type type,
    944                   const unsigned char swizzles[4],
    945                   LLVMValueRef consts_ptr,
    946                   const LLVMValueRef *inputs,
    947                   LLVMValueRef *outputs,
    948                   struct lp_build_sampler_aos *sampler,
    949                   const struct tgsi_shader_info *info)
    950 {
    951    struct lp_build_tgsi_aos_context bld;
    952    struct tgsi_parse_context parse;
    953    uint num_immediates = 0;
    954    unsigned chan;
    955    int pc = 0;
    956 
    957    /* Setup build context */
    958    memset(&bld, 0, sizeof bld);
    959    lp_build_context_init(&bld.bld_base.base, gallivm, type);
    960    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
    961    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
    962    lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
    963 
    964    for (chan = 0; chan < 4; ++chan) {
    965       bld.swizzles[chan] = swizzles[chan];
    966       bld.inv_swizzles[swizzles[chan]] = chan;
    967    }
    968 
    969    bld.inputs = inputs;
    970    bld.outputs = outputs;
    971    bld.consts_ptr = consts_ptr;
    972    bld.sampler = sampler;
    973    bld.indirect_files = info->indirect_files;
    974    bld.bld_base.emit_swizzle = swizzle_aos;
    975    bld.bld_base.info = info;
    976 
    977    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
    978    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
    979    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
    980    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
    981 
    982    /* Set opcode actions */
    983    lp_set_default_actions_cpu(&bld.bld_base);
    984 
    985    if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
    986       return;
    987    }
    988 
    989    tgsi_parse_init(&parse, tokens);
    990 
    991    while (!tgsi_parse_end_of_tokens(&parse)) {
    992       tgsi_parse_token(&parse);
    993 
    994       switch(parse.FullToken.Token.Type) {
    995       case TGSI_TOKEN_TYPE_DECLARATION:
    996          /* Inputs already interpolated */
    997          lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
    998          break;
    999 
   1000       case TGSI_TOKEN_TYPE_INSTRUCTION:
   1001          /* save expanded instruction */
   1002          lp_bld_tgsi_add_instruction(&bld.bld_base,
   1003                                      &parse.FullToken.FullInstruction);
   1004          break;
   1005 
   1006       case TGSI_TOKEN_TYPE_IMMEDIATE:
   1007          /* simply copy the immediate values into the next immediates[] slot */
   1008          {
   1009             const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
   1010             float imm[4];
   1011             assert(size <= 4);
   1012             assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
   1013             for (chan = 0; chan < 4; ++chan) {
   1014                imm[chan] = 0.0f;
   1015             }
   1016             for (chan = 0; chan < size; ++chan) {
   1017                unsigned swizzle = bld.swizzles[chan];
   1018                imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
   1019             }
   1020             bld.immediates[num_immediates] =
   1021                      lp_build_const_aos(gallivm, type,
   1022                                         imm[0], imm[1], imm[2], imm[3],
   1023                                         NULL);
   1024             num_immediates++;
   1025          }
   1026          break;
   1027 
   1028       case TGSI_TOKEN_TYPE_PROPERTY:
   1029          break;
   1030 
   1031       default:
   1032          assert(0);
   1033       }
   1034    }
   1035 
   1036    while (pc != -1) {
   1037       struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
   1038       const struct tgsi_opcode_info *opcode_info =
   1039          tgsi_get_opcode_info(instr->Instruction.Opcode);
   1040       if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
   1041          _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
   1042                        opcode_info->mnemonic);
   1043    }
   1044 
   1045    if (0) {
   1046       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
   1047       LLVMValueRef function = LLVMGetBasicBlockParent(block);
   1048       debug_printf("11111111111111111111111111111 \n");
   1049       tgsi_dump(tokens, 0);
   1050       lp_debug_dump_value(function);
   1051       debug_printf("2222222222222222222222222222 \n");
   1052    }
   1053    tgsi_parse_free(&parse);
   1054    FREE(bld.bld_base.instructions);
   1055 
   1056    if (0) {
   1057       LLVMModuleRef module = LLVMGetGlobalParent(
   1058          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
   1059       LLVMDumpModule(module);
   1060    }
   1061 
   1062 }
   1063 
   1064