Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * @file
     30  * TGSI to LLVM IR translation -- AoS.
     31  *
     32  * FIXME:
     33  * - No control flow support: the existing control flow code should be factored
     34  * out into from the SoA code into a common module and shared.
     35  * - No derivatives. Derivate logic should be pluggable, just like the samplers.
     36  *
     37  * @author Jose Fonseca <jfonseca (at) vmware.com>
     38  */
     39 
     40 #include "pipe/p_config.h"
     41 #include "pipe/p_shader_tokens.h"
     42 #include "util/u_debug.h"
     43 #include "util/u_math.h"
     44 #include "util/u_memory.h"
     45 #include "tgsi/tgsi_dump.h"
     46 #include "tgsi/tgsi_info.h"
     47 #include "tgsi/tgsi_parse.h"
     48 #include "tgsi/tgsi_util.h"
     49 #include "tgsi/tgsi_scan.h"
     50 #include "lp_bld_type.h"
     51 #include "lp_bld_const.h"
     52 #include "lp_bld_arit.h"
     53 #include "lp_bld_logic.h"
     54 #include "lp_bld_swizzle.h"
     55 #include "lp_bld_flow.h"
     56 #include "lp_bld_quad.h"
     57 #include "lp_bld_tgsi.h"
     58 #include "lp_bld_debug.h"
     59 #include "lp_bld_sample.h"
     60 
     61 
     62 /**
     63  * Wrapper around lp_build_swizzle_aos which translates swizzles to another
     64  * ordering.
     65  */
     66 static LLVMValueRef
     67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
     68             LLVMValueRef a,
     69             unsigned swizzle_x,
     70             unsigned swizzle_y,
     71             unsigned swizzle_z,
     72             unsigned swizzle_w)
     73 {
     74    unsigned char swizzles[4];
     75    struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
     76 
     77    assert(swizzle_x < 4);
     78    assert(swizzle_y < 4);
     79    assert(swizzle_z < 4);
     80    assert(swizzle_w < 4);
     81 
     82    swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
     83    swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
     84    swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
     85    swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
     86 
     87    return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
     88 }
     89 
     90 
     91 static LLVMValueRef
     92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
     93                    LLVMValueRef a,
     94                    unsigned chan)
     95 {
     96    chan = bld->swizzles[chan];
     97    return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
     98 }
     99 
    100 
    101 static LLVMValueRef
    102 emit_fetch_constant(
    103    struct lp_build_tgsi_context * bld_base,
    104    const struct tgsi_full_src_register * reg,
    105    enum tgsi_opcode_type stype,
    106    unsigned swizzle)
    107 {
    108    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    109    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
    110    struct lp_type type = bld_base->base.type;
    111    LLVMValueRef res;
    112    unsigned chan;
    113 
    114    assert(!reg->Register.Indirect);
    115 
    116    /*
    117     * Get the constants components
    118     */
    119 
    120    res = bld->bld_base.base.undef;
    121    for (chan = 0; chan < 4; ++chan) {
    122       LLVMValueRef index;
    123       LLVMValueRef scalar_ptr;
    124       LLVMValueRef scalar;
    125       LLVMValueRef swizzle;
    126 
    127       index = lp_build_const_int32(bld->bld_base.base.gallivm,
    128                                    reg->Register.Index * 4 + chan);
    129 
    130       scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
    131 
    132       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
    133 
    134       lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
    135 
    136       /*
    137        * NOTE: constants array is always assumed to be RGBA
    138        */
    139 
    140       swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
    141                                      bld->swizzles[chan]);
    142 
    143       res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
    144    }
    145 
    146    /*
    147     * Broadcast the first quaternion to all others.
    148     *
    149     * XXX: could be factored into a reusable function.
    150     */
    151 
    152    if (type.length > 4) {
    153       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
    154       unsigned i;
    155 
    156       for (chan = 0; chan < 4; ++chan) {
    157          shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
    158       }
    159 
    160       for (i = 4; i < type.length; ++i) {
    161          shuffles[i] = shuffles[i % 4];
    162       }
    163 
    164       res = LLVMBuildShuffleVector(builder,
    165                                    res, bld->bld_base.base.undef,
    166                                    LLVMConstVector(shuffles, type.length),
    167                                    "");
    168    }
    169    return res;
    170 }
    171 
    172 static LLVMValueRef
    173 emit_fetch_immediate(
    174    struct lp_build_tgsi_context * bld_base,
    175    const struct tgsi_full_src_register * reg,
    176    enum tgsi_opcode_type stype,
    177    unsigned swizzle)
    178 {
    179    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    180    LLVMValueRef res = bld->immediates[reg->Register.Index];
    181    assert(res);
    182    return res;
    183 }
    184 
    185 static LLVMValueRef
    186 emit_fetch_input(
    187    struct lp_build_tgsi_context * bld_base,
    188    const struct tgsi_full_src_register * reg,
    189    enum tgsi_opcode_type stype,
    190    unsigned swizzle)
    191 {
    192    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    193    LLVMValueRef res = bld->inputs[reg->Register.Index];
    194    assert(!reg->Register.Indirect);
    195    assert(res);
    196    return res;
    197 }
    198 
    199 static LLVMValueRef
    200 emit_fetch_temporary(
    201    struct lp_build_tgsi_context * bld_base,
    202    const struct tgsi_full_src_register * reg,
    203    enum tgsi_opcode_type stype,
    204    unsigned swizzle)
    205 {
    206    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
    207    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
    208    LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
    209    LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
    210    assert(!reg->Register.Indirect);
    211    if (!res)
    212       return bld->bld_base.base.undef;
    213 
    214    return res;
    215 }
    216 
    217 /**
    218  * Register store.
    219  */
    220 void
    221 lp_emit_store_aos(
    222    struct lp_build_tgsi_aos_context *bld,
    223    const struct tgsi_full_instruction *inst,
    224    unsigned index,
    225    LLVMValueRef value)
    226 {
    227    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    228    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
    229    LLVMValueRef mask = NULL;
    230    LLVMValueRef ptr;
    231 
    232    /*
    233     * Saturate the value
    234     */
    235    if (inst->Instruction.Saturate) {
    236       value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
    237       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
    238    }
    239 
    240    /*
    241     * Translate the register file
    242     */
    243 
    244    assert(!reg->Register.Indirect);
    245 
    246    switch (reg->Register.File) {
    247    case TGSI_FILE_OUTPUT:
    248       ptr = bld->outputs[reg->Register.Index];
    249       break;
    250 
    251    case TGSI_FILE_TEMPORARY:
    252       ptr = bld->temps[reg->Register.Index];
    253       break;
    254 
    255    case TGSI_FILE_ADDRESS:
    256       ptr = bld->addr[reg->Indirect.Index];
    257       break;
    258 
    259    default:
    260       assert(0);
    261       return;
    262    }
    263 
    264    if (!ptr)
    265       return;
    266 
    267    /*
    268     * Writemask
    269     */
    270 
    271    if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
    272       LLVMValueRef writemask;
    273 
    274       writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
    275                                                    bld->bld_base.base.type,
    276                                                    reg->Register.WriteMask,
    277                                                    TGSI_NUM_CHANNELS,
    278                                                    bld->swizzles);
    279 
    280       if (mask) {
    281          mask = LLVMBuildAnd(builder, mask, writemask, "");
    282       } else {
    283          mask = writemask;
    284       }
    285    }
    286 
    287    if (mask) {
    288       LLVMValueRef orig_value;
    289 
    290       orig_value = LLVMBuildLoad(builder, ptr, "");
    291       value = lp_build_select(&bld->bld_base.base,
    292                               mask, value, orig_value);
    293    }
    294 
    295    LLVMBuildStore(builder, value, ptr);
    296 }
    297 
    298 
    299 /**
    300  * High-level instruction translators.
    301  */
    302 
    303 static LLVMValueRef
    304 emit_tex(struct lp_build_tgsi_aos_context *bld,
    305          const struct tgsi_full_instruction *inst,
    306          enum lp_build_tex_modifier modifier)
    307 {
    308    unsigned target;
    309    unsigned unit;
    310    LLVMValueRef coords;
    311    struct lp_derivatives derivs = { {NULL}, {NULL} };
    312 
    313    if (!bld->sampler) {
    314       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
    315       return bld->bld_base.base.undef;
    316    }
    317 
    318    target = inst->Texture.Texture;
    319 
    320    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
    321 
    322    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
    323       /* probably not going to work */
    324       derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
    325       derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
    326       unit = inst->Src[3].Register.Index;
    327    }
    328    else {
    329       unit = inst->Src[1].Register.Index;
    330    }
    331    return bld->sampler->emit_fetch_texel(bld->sampler,
    332                                          &bld->bld_base.base,
    333                                          target, unit,
    334                                          coords, derivs,
    335                                          modifier);
    336 }
    337 
    338 
    339 static LLVMValueRef
    340 emit_sample(struct lp_build_tgsi_aos_context *bld,
    341             const struct tgsi_full_instruction *inst,
    342             enum lp_build_tex_modifier modifier)
    343 {
    344    unsigned target;
    345    unsigned unit;
    346    LLVMValueRef coords;
    347    struct lp_derivatives derivs = { {NULL}, {NULL} };
    348 
    349    if (!bld->sampler) {
    350       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
    351       return bld->bld_base.base.undef;
    352    }
    353 
    354    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
    355 
    356    /* ignore modifiers, can't handle different sampler / sampler view, etc... */
    357    unit = inst->Src[1].Register.Index;
    358    assert(inst->Src[2].Register.Index == unit);
    359 
    360    target = bld->sv[unit].Resource;
    361 
    362    return bld->sampler->emit_fetch_texel(bld->sampler,
    363                                          &bld->bld_base.base,
    364                                          target, unit,
    365                                          coords, derivs,
    366                                          modifier);
    367 }
    368 
    369 
    370 void
    371 lp_emit_declaration_aos(
    372    struct lp_build_tgsi_aos_context *bld,
    373    const struct tgsi_full_declaration *decl)
    374 {
    375    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    376    LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
    377 
    378    unsigned first = decl->Range.First;
    379    unsigned last = decl->Range.Last;
    380    unsigned idx;
    381 
    382    for (idx = first; idx <= last; ++idx) {
    383       switch (decl->Declaration.File) {
    384       case TGSI_FILE_TEMPORARY:
    385          assert(idx < LP_MAX_INLINED_TEMPS);
    386          if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
    387             LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
    388             bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
    389                                                      vec_type, array_size, "");
    390          } else {
    391             bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
    392          }
    393          break;
    394 
    395       case TGSI_FILE_OUTPUT:
    396          bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
    397          break;
    398 
    399       case TGSI_FILE_ADDRESS:
    400          assert(idx < LP_MAX_TGSI_ADDRS);
    401          bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
    402          break;
    403 
    404       case TGSI_FILE_SAMPLER_VIEW:
    405          /*
    406           * The target stored here MUST match whatever there actually
    407           * is in the set sampler views (what about return type?).
    408           */
    409          assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
    410          for (idx = first; idx <= last; ++idx) {
    411             bld->sv[idx] = decl->SamplerView;
    412          }
    413          break;
    414 
    415       default:
    416          /* don't need to declare other vars */
    417          break;
    418       }
    419    }
    420 }
    421 
    422 
    423 /**
    424  * Emit LLVM for one TGSI instruction.
    425  * \param return TRUE for success, FALSE otherwise
    426  */
    427 boolean
    428 lp_emit_instruction_aos(
    429    struct lp_build_tgsi_aos_context *bld,
    430    const struct tgsi_full_instruction *inst,
    431    const struct tgsi_opcode_info *info,
    432    int *pc)
    433 {
    434    LLVMValueRef src0, src1, src2;
    435    LLVMValueRef tmp0;
    436    LLVMValueRef dst0 = NULL;
    437 
    438    /*
    439     * Stores and write masks are handled in a general fashion after the long
    440     * instruction opcode switch statement.
    441     *
    442     * Although not stricitly necessary, we avoid generating instructions for
    443     * channels which won't be stored, in cases where's that easy. For some
    444     * complex instructions, like texture sampling, it is more convenient to
    445     * assume a full writemask and then let LLVM optimization passes eliminate
    446     * redundant code.
    447     */
    448 
    449    (*pc)++;
    450 
    451    assert(info->num_dst <= 1);
    452    if (info->num_dst) {
    453       dst0 = bld->bld_base.base.undef;
    454    }
    455 
    456    switch (inst->Instruction.Opcode) {
    457    case TGSI_OPCODE_ARL:
    458       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    459       dst0 = lp_build_floor(&bld->bld_base.base, src0);
    460       break;
    461 
    462    case TGSI_OPCODE_MOV:
    463       dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    464       break;
    465 
    466    case TGSI_OPCODE_LIT:
    467       return FALSE;
    468 
    469    case TGSI_OPCODE_RCP:
    470    /* TGSI_OPCODE_RECIP */
    471       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    472       dst0 = lp_build_rcp(&bld->bld_base.base, src0);
    473       break;
    474 
    475    case TGSI_OPCODE_RSQ:
    476    /* TGSI_OPCODE_RECIPSQRT */
    477       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    478       tmp0 = lp_build_abs(&bld->bld_base.base, src0);
    479       dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
    480       break;
    481 
    482    case TGSI_OPCODE_EXP:
    483       return FALSE;
    484 
    485    case TGSI_OPCODE_LOG:
    486       return FALSE;
    487 
    488    case TGSI_OPCODE_MUL:
    489       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    490       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    491       dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
    492       break;
    493 
    494    case TGSI_OPCODE_ADD:
    495       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    496       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    497       dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
    498       break;
    499 
    500    case TGSI_OPCODE_DP3:
    501    /* TGSI_OPCODE_DOT3 */
    502       return FALSE;
    503 
    504    case TGSI_OPCODE_DP4:
    505    /* TGSI_OPCODE_DOT4 */
    506       return FALSE;
    507 
    508    case TGSI_OPCODE_DST:
    509       return FALSE;
    510 
    511    case TGSI_OPCODE_MIN:
    512       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    513       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    514       dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
    515       break;
    516 
    517    case TGSI_OPCODE_MAX:
    518       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    519       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    520       dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
    521       break;
    522 
    523    case TGSI_OPCODE_SLT:
    524    /* TGSI_OPCODE_SETLT */
    525       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    526       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    527       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
    528       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    529       break;
    530 
    531    case TGSI_OPCODE_SGE:
    532    /* TGSI_OPCODE_SETGE */
    533       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    534       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    535       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
    536       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    537       break;
    538 
    539    case TGSI_OPCODE_MAD:
    540    /* TGSI_OPCODE_MADD */
    541       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    542       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    543       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    544       tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
    545       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
    546       break;
    547 
    548    case TGSI_OPCODE_LRP:
    549       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    550       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    551       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    552       tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
    553       tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
    554       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
    555       break;
    556 
    557    case TGSI_OPCODE_FRC:
    558       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    559       tmp0 = lp_build_floor(&bld->bld_base.base, src0);
    560       dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
    561       break;
    562 
    563    case TGSI_OPCODE_FLR:
    564       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    565       dst0 = lp_build_floor(&bld->bld_base.base, src0);
    566       break;
    567 
    568    case TGSI_OPCODE_ROUND:
    569       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    570       dst0 = lp_build_round(&bld->bld_base.base, src0);
    571       break;
    572 
    573    case TGSI_OPCODE_EX2:
    574       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    575       tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
    576       dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
    577       break;
    578 
    579    case TGSI_OPCODE_LG2:
    580       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    581       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    582       dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
    583       break;
    584 
    585    case TGSI_OPCODE_POW:
    586       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    587       src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    588       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    589       src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
    590       dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
    591       break;
    592 
    593    case TGSI_OPCODE_COS:
    594       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    595       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    596       dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
    597       break;
    598 
    599    case TGSI_OPCODE_DDX:
    600       return FALSE;
    601 
    602    case TGSI_OPCODE_DDY:
    603       return FALSE;
    604 
    605    case TGSI_OPCODE_KILL:
    606       return FALSE;
    607 
    608    case TGSI_OPCODE_KILL_IF:
    609       return FALSE;
    610 
    611    case TGSI_OPCODE_PK2H:
    612       return FALSE;
    613       break;
    614 
    615    case TGSI_OPCODE_PK2US:
    616       return FALSE;
    617       break;
    618 
    619    case TGSI_OPCODE_PK4B:
    620       return FALSE;
    621       break;
    622 
    623    case TGSI_OPCODE_PK4UB:
    624       return FALSE;
    625 
    626    case TGSI_OPCODE_SEQ:
    627       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    628       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    629       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
    630       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    631       break;
    632 
    633    case TGSI_OPCODE_SGT:
    634       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    635       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    636       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
    637       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    638       break;
    639 
    640    case TGSI_OPCODE_SIN:
    641       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    642       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
    643       dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
    644       break;
    645 
    646    case TGSI_OPCODE_SLE:
    647       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    648       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    649       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
    650       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    651       break;
    652 
    653    case TGSI_OPCODE_SNE:
    654       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    655       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    656       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
    657       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
    658       break;
    659 
    660    case TGSI_OPCODE_TEX:
    661       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
    662       break;
    663 
    664    case TGSI_OPCODE_TXD:
    665       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
    666       break;
    667 
    668    case TGSI_OPCODE_UP2H:
    669       /* deprecated */
    670       assert (0);
    671       return FALSE;
    672       break;
    673 
    674    case TGSI_OPCODE_UP2US:
    675       /* deprecated */
    676       assert(0);
    677       return FALSE;
    678       break;
    679 
    680    case TGSI_OPCODE_UP4B:
    681       /* deprecated */
    682       assert(0);
    683       return FALSE;
    684       break;
    685 
    686    case TGSI_OPCODE_UP4UB:
    687       /* deprecated */
    688       assert(0);
    689       return FALSE;
    690       break;
    691 
    692    case TGSI_OPCODE_ARR:
    693       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    694       dst0 = lp_build_round(&bld->bld_base.base, src0);
    695       break;
    696 
    697    case TGSI_OPCODE_CAL:
    698       return FALSE;
    699 
    700    case TGSI_OPCODE_RET:
    701       /* safe to ignore at end */
    702       break;
    703 
    704    case TGSI_OPCODE_END:
    705       *pc = -1;
    706       break;
    707 
    708    case TGSI_OPCODE_SSG:
    709    /* TGSI_OPCODE_SGN */
    710       tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    711       dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
    712       break;
    713 
    714    case TGSI_OPCODE_CMP:
    715       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    716       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
    717       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
    718       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
    719       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
    720       break;
    721 
    722    case TGSI_OPCODE_TXB:
    723       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
    724       break;
    725 
    726    case TGSI_OPCODE_DIV:
    727       assert(0);
    728       return FALSE;
    729       break;
    730 
    731    case TGSI_OPCODE_DP2:
    732       return FALSE;
    733 
    734    case TGSI_OPCODE_TXL:
    735       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
    736       break;
    737 
    738    case TGSI_OPCODE_TXP:
    739       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
    740       break;
    741 
    742    case TGSI_OPCODE_BRK:
    743       return FALSE;
    744 
    745    case TGSI_OPCODE_IF:
    746    case TGSI_OPCODE_UIF:
    747       return FALSE;
    748 
    749    case TGSI_OPCODE_BGNLOOP:
    750       return FALSE;
    751 
    752    case TGSI_OPCODE_BGNSUB:
    753       return FALSE;
    754 
    755    case TGSI_OPCODE_ELSE:
    756       return FALSE;
    757 
    758    case TGSI_OPCODE_ENDIF:
    759       return FALSE;
    760 
    761    case TGSI_OPCODE_ENDLOOP:
    762       return FALSE;
    763 
    764    case TGSI_OPCODE_ENDSUB:
    765       return FALSE;
    766 
    767    case TGSI_OPCODE_CEIL:
    768       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    769       dst0 = lp_build_ceil(&bld->bld_base.base, src0);
    770       break;
    771 
    772    case TGSI_OPCODE_I2F:
    773       assert(0);
    774       return FALSE;
    775       break;
    776 
    777    case TGSI_OPCODE_NOT:
    778       assert(0);
    779       return FALSE;
    780       break;
    781 
    782    case TGSI_OPCODE_TRUNC:
    783       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
    784       dst0 = lp_build_trunc(&bld->bld_base.base, src0);
    785       break;
    786 
    787    case TGSI_OPCODE_SHL:
    788       assert(0);
    789       return FALSE;
    790       break;
    791 
    792    case TGSI_OPCODE_ISHR:
    793       assert(0);
    794       return FALSE;
    795       break;
    796 
    797    case TGSI_OPCODE_AND:
    798       assert(0);
    799       return FALSE;
    800       break;
    801 
    802    case TGSI_OPCODE_OR:
    803       assert(0);
    804       return FALSE;
    805       break;
    806 
    807    case TGSI_OPCODE_MOD:
    808       assert(0);
    809       return FALSE;
    810       break;
    811 
    812    case TGSI_OPCODE_XOR:
    813       assert(0);
    814       return FALSE;
    815       break;
    816 
    817    case TGSI_OPCODE_TXF:
    818       assert(0);
    819       return FALSE;
    820       break;
    821 
    822    case TGSI_OPCODE_TXQ:
    823       assert(0);
    824       return FALSE;
    825       break;
    826 
    827    case TGSI_OPCODE_CONT:
    828       return FALSE;
    829 
    830    case TGSI_OPCODE_EMIT:
    831       return FALSE;
    832       break;
    833 
    834    case TGSI_OPCODE_ENDPRIM:
    835       return FALSE;
    836       break;
    837 
    838    case TGSI_OPCODE_NOP:
    839       break;
    840 
    841    case TGSI_OPCODE_SAMPLE:
    842       dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
    843       break;
    844 
    845    default:
    846       return FALSE;
    847    }
    848 
    849    if (info->num_dst) {
    850       lp_emit_store_aos(bld, inst, 0, dst0);
    851    }
    852 
    853    return TRUE;
    854 }
    855 
    856 
    857 void
    858 lp_build_tgsi_aos(struct gallivm_state *gallivm,
    859                   const struct tgsi_token *tokens,
    860                   struct lp_type type,
    861                   const unsigned char swizzles[4],
    862                   LLVMValueRef consts_ptr,
    863                   const LLVMValueRef *inputs,
    864                   LLVMValueRef *outputs,
    865                   struct lp_build_sampler_aos *sampler,
    866                   const struct tgsi_shader_info *info)
    867 {
    868    struct lp_build_tgsi_aos_context bld;
    869    struct tgsi_parse_context parse;
    870    uint num_immediates = 0;
    871    unsigned chan;
    872    int pc = 0;
    873 
    874    /* Setup build context */
    875    memset(&bld, 0, sizeof bld);
    876    lp_build_context_init(&bld.bld_base.base, gallivm, type);
    877    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
    878    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
    879    lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
    880 
    881    for (chan = 0; chan < 4; ++chan) {
    882       bld.swizzles[chan] = swizzles[chan];
    883       bld.inv_swizzles[swizzles[chan]] = chan;
    884    }
    885 
    886    bld.inputs = inputs;
    887    bld.outputs = outputs;
    888    bld.consts_ptr = consts_ptr;
    889    bld.sampler = sampler;
    890    bld.indirect_files = info->indirect_files;
    891    bld.bld_base.emit_swizzle = swizzle_aos;
    892    bld.bld_base.info = info;
    893 
    894    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
    895    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
    896    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
    897    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
    898 
    899    /* Set opcode actions */
    900    lp_set_default_actions_cpu(&bld.bld_base);
    901 
    902    if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
    903       return;
    904    }
    905 
    906    tgsi_parse_init(&parse, tokens);
    907 
    908    while (!tgsi_parse_end_of_tokens(&parse)) {
    909       tgsi_parse_token(&parse);
    910 
    911       switch(parse.FullToken.Token.Type) {
    912       case TGSI_TOKEN_TYPE_DECLARATION:
    913          /* Inputs already interpolated */
    914          lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
    915          break;
    916 
    917       case TGSI_TOKEN_TYPE_INSTRUCTION:
    918          /* save expanded instruction */
    919          lp_bld_tgsi_add_instruction(&bld.bld_base,
    920                                      &parse.FullToken.FullInstruction);
    921          break;
    922 
    923       case TGSI_TOKEN_TYPE_IMMEDIATE:
    924          /* simply copy the immediate values into the next immediates[] slot */
    925          {
    926             const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
    927             float imm[4];
    928             assert(size <= 4);
    929             assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
    930             for (chan = 0; chan < 4; ++chan) {
    931                imm[chan] = 0.0f;
    932             }
    933             for (chan = 0; chan < size; ++chan) {
    934                unsigned swizzle = bld.swizzles[chan];
    935                imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
    936             }
    937             bld.immediates[num_immediates] =
    938                      lp_build_const_aos(gallivm, type,
    939                                         imm[0], imm[1], imm[2], imm[3],
    940                                         NULL);
    941             num_immediates++;
    942          }
    943          break;
    944 
    945       case TGSI_TOKEN_TYPE_PROPERTY:
    946          break;
    947 
    948       default:
    949          assert(0);
    950       }
    951    }
    952 
    953    while (pc != -1) {
    954       struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
    955       const struct tgsi_opcode_info *opcode_info =
    956          tgsi_get_opcode_info(instr->Instruction.Opcode);
    957       if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
    958          _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
    959                        tgsi_get_opcode_name(instr->Instruction.Opcode));
    960    }
    961 
    962    if (0) {
    963       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
    964       LLVMValueRef function = LLVMGetBasicBlockParent(block);
    965       debug_printf("11111111111111111111111111111 \n");
    966       tgsi_dump(tokens, 0);
    967       lp_debug_dump_value(function);
    968       debug_printf("2222222222222222222222222222 \n");
    969    }
    970    tgsi_parse_free(&parse);
    971    FREE(bld.bld_base.instructions);
    972 
    973    if (0) {
    974       LLVMModuleRef module = LLVMGetGlobalParent(
    975          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
    976       LLVMDumpModule(module);
    977    }
    978 
    979 }
    980 
    981