Home | History | Annotate | Download | only in shader
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 2012-2013 LunarG, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     22  * DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Chia-I Wu <olv (at) lunarg.com>
     26  */
     27 
     28 #include "tgsi/tgsi_parse.h"
     29 #include "tgsi/tgsi_info.h"
     30 #include "tgsi/tgsi_strings.h"
     31 #include "util/u_hash_table.h"
     32 #include "toy_helpers.h"
     33 #include "toy_tgsi.h"
     34 
     35 /* map TGSI opcode to GEN opcode 1-to-1 */
     36 static const struct {
     37    int opcode;
     38    int num_dst;
     39    int num_src;
     40 } aos_simple_opcode_map[TGSI_OPCODE_LAST] = {
     41    [TGSI_OPCODE_ARL]          = { GEN6_OPCODE_RNDD,                1, 1 },
     42    [TGSI_OPCODE_MOV]          = { GEN6_OPCODE_MOV,                 1, 1 },
     43    [TGSI_OPCODE_RCP]          = { TOY_OPCODE_INV,                 1, 1 },
     44    [TGSI_OPCODE_RSQ]          = { TOY_OPCODE_RSQ,                 1, 1 },
     45    [TGSI_OPCODE_MUL]          = { GEN6_OPCODE_MUL,                 1, 2 },
     46    [TGSI_OPCODE_ADD]          = { GEN6_OPCODE_ADD,                 1, 2 },
     47    [TGSI_OPCODE_DP3]          = { GEN6_OPCODE_DP3,                 1, 2 },
     48    [TGSI_OPCODE_DP4]          = { GEN6_OPCODE_DP4,                 1, 2 },
     49    [TGSI_OPCODE_MIN]          = { GEN6_OPCODE_SEL,                 1, 2 },
     50    [TGSI_OPCODE_MAX]          = { GEN6_OPCODE_SEL,                 1, 2 },
     51    /* a later pass will move src[2] to accumulator */
     52    [TGSI_OPCODE_MAD]          = { GEN6_OPCODE_MAC,                 1, 3 },
     53    [TGSI_OPCODE_SQRT]         = { TOY_OPCODE_SQRT,                1, 1 },
     54    [TGSI_OPCODE_FRC]          = { GEN6_OPCODE_FRC,                 1, 1 },
     55    [TGSI_OPCODE_FLR]          = { GEN6_OPCODE_RNDD,                1, 1 },
     56    [TGSI_OPCODE_ROUND]        = { GEN6_OPCODE_RNDE,                1, 1 },
     57    [TGSI_OPCODE_EX2]          = { TOY_OPCODE_EXP,                 1, 1 },
     58    [TGSI_OPCODE_LG2]          = { TOY_OPCODE_LOG,                 1, 1 },
     59    [TGSI_OPCODE_POW]          = { TOY_OPCODE_POW,                 1, 2 },
     60    [TGSI_OPCODE_DPH]          = { GEN6_OPCODE_DPH,                 1, 2 },
     61    [TGSI_OPCODE_COS]          = { TOY_OPCODE_COS,                 1, 1 },
     62    [TGSI_OPCODE_KILL]         = { TOY_OPCODE_KIL,                 0, 0 },
     63    [TGSI_OPCODE_SIN]          = { TOY_OPCODE_SIN,                 1, 1 },
     64    [TGSI_OPCODE_ARR]          = { GEN6_OPCODE_RNDZ,                1, 1 },
     65    [TGSI_OPCODE_DP2]          = { GEN6_OPCODE_DP2,                 1, 2 },
     66    [TGSI_OPCODE_IF]           = { GEN6_OPCODE_IF,                  0, 1 },
     67    [TGSI_OPCODE_UIF]          = { GEN6_OPCODE_IF,                  0, 1 },
     68    [TGSI_OPCODE_ELSE]         = { GEN6_OPCODE_ELSE,                0, 0 },
     69    [TGSI_OPCODE_ENDIF]        = { GEN6_OPCODE_ENDIF,               0, 0 },
     70    [TGSI_OPCODE_I2F]          = { GEN6_OPCODE_MOV,                 1, 1 },
     71    [TGSI_OPCODE_NOT]          = { GEN6_OPCODE_NOT,                 1, 1 },
     72    [TGSI_OPCODE_TRUNC]        = { GEN6_OPCODE_RNDZ,                1, 1 },
     73    [TGSI_OPCODE_SHL]          = { GEN6_OPCODE_SHL,                 1, 2 },
     74    [TGSI_OPCODE_AND]          = { GEN6_OPCODE_AND,                 1, 2 },
     75    [TGSI_OPCODE_OR]           = { GEN6_OPCODE_OR,                  1, 2 },
     76    [TGSI_OPCODE_MOD]          = { TOY_OPCODE_INT_DIV_REMAINDER,   1, 2 },
     77    [TGSI_OPCODE_XOR]          = { GEN6_OPCODE_XOR,                 1, 2 },
     78    [TGSI_OPCODE_EMIT]         = { TOY_OPCODE_EMIT,                0, 0 },
     79    [TGSI_OPCODE_ENDPRIM]      = { TOY_OPCODE_ENDPRIM,             0, 0 },
     80    [TGSI_OPCODE_NOP]          = { GEN6_OPCODE_NOP,                 0, 0 },
     81    [TGSI_OPCODE_KILL_IF]      = { TOY_OPCODE_KIL,                 0, 1 },
     82    [TGSI_OPCODE_END]          = { GEN6_OPCODE_NOP,                 0, 0 },
     83    [TGSI_OPCODE_F2I]          = { GEN6_OPCODE_MOV,                 1, 1 },
     84    [TGSI_OPCODE_IDIV]         = { TOY_OPCODE_INT_DIV_QUOTIENT,    1, 2 },
     85    [TGSI_OPCODE_IMAX]         = { GEN6_OPCODE_SEL,                 1, 2 },
     86    [TGSI_OPCODE_IMIN]         = { GEN6_OPCODE_SEL,                 1, 2 },
     87    [TGSI_OPCODE_INEG]         = { GEN6_OPCODE_MOV,                 1, 1 },
     88    [TGSI_OPCODE_ISHR]         = { GEN6_OPCODE_ASR,                 1, 2 },
     89    [TGSI_OPCODE_F2U]          = { GEN6_OPCODE_MOV,                 1, 1 },
     90    [TGSI_OPCODE_U2F]          = { GEN6_OPCODE_MOV,                 1, 1 },
     91    [TGSI_OPCODE_UADD]         = { GEN6_OPCODE_ADD,                 1, 2 },
     92    [TGSI_OPCODE_UDIV]         = { TOY_OPCODE_INT_DIV_QUOTIENT,    1, 2 },
     93    /* a later pass will move src[2] to accumulator */
     94    [TGSI_OPCODE_UMAD]         = { GEN6_OPCODE_MAC,                 1, 3 },
     95    [TGSI_OPCODE_UMAX]         = { GEN6_OPCODE_SEL,                 1, 2 },
     96    [TGSI_OPCODE_UMIN]         = { GEN6_OPCODE_SEL,                 1, 2 },
     97    [TGSI_OPCODE_UMOD]         = { TOY_OPCODE_INT_DIV_REMAINDER,   1, 2 },
     98    [TGSI_OPCODE_UMUL]         = { GEN6_OPCODE_MUL,                 1, 2 },
     99    [TGSI_OPCODE_USHR]         = { GEN6_OPCODE_SHR,                 1, 2 },
    100    [TGSI_OPCODE_UARL]         = { GEN6_OPCODE_MOV,                 1, 1 },
    101    [TGSI_OPCODE_IABS]         = { GEN6_OPCODE_MOV,                 1, 1 },
    102 };
    103 
    104 static void
    105 aos_simple(struct toy_compiler *tc,
    106            const struct tgsi_full_instruction *tgsi_inst,
    107            struct toy_dst *dst,
    108            struct toy_src *src)
    109 {
    110    struct toy_inst *inst;
    111    int opcode;
    112    int cond_modifier = GEN6_COND_NONE;
    113    int num_dst = tgsi_inst->Instruction.NumDstRegs;
    114    int num_src = tgsi_inst->Instruction.NumSrcRegs;
    115    int i;
    116 
    117    opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
    118    assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst);
    119    assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src);
    120    if (!opcode) {
    121       assert(!"invalid aos_simple() call");
    122       return;
    123    }
    124 
    125    /* no need to emit nop */
    126    if (opcode == GEN6_OPCODE_NOP)
    127       return;
    128 
    129    inst = tc_add(tc);
    130    if (!inst)
    131       return;
    132 
    133    inst->opcode = opcode;
    134 
    135    switch (tgsi_inst->Instruction.Opcode) {
    136    case TGSI_OPCODE_MIN:
    137    case TGSI_OPCODE_IMIN:
    138    case TGSI_OPCODE_UMIN:
    139       cond_modifier = GEN6_COND_L;
    140       break;
    141    case TGSI_OPCODE_MAX:
    142    case TGSI_OPCODE_IMAX:
    143    case TGSI_OPCODE_UMAX:
    144       cond_modifier = GEN6_COND_GE;
    145       break;
    146    case TGSI_OPCODE_IABS:
    147       src[0] = tsrc_absolute(src[0]);
    148       break;
    149    case TGSI_OPCODE_IF:
    150       cond_modifier = GEN6_COND_NZ;
    151       num_src = 2;
    152       assert(src[0].type == TOY_TYPE_F);
    153       src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
    154       src[1] = tsrc_imm_f(0.0f);
    155       break;
    156    case TGSI_OPCODE_UIF:
    157       cond_modifier = GEN6_COND_NZ;
    158       num_src = 2;
    159       assert(src[0].type == TOY_TYPE_UD);
    160       src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
    161       src[1] = tsrc_imm_d(0);
    162       break;
    163    case TGSI_OPCODE_INEG:
    164       src[0] = tsrc_negate(src[0]);
    165       break;
    166    case TGSI_OPCODE_RCP:
    167    case TGSI_OPCODE_RSQ:
    168    case TGSI_OPCODE_EX2:
    169    case TGSI_OPCODE_LG2:
    170    case TGSI_OPCODE_COS:
    171    case TGSI_OPCODE_SIN:
    172       src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
    173       break;
    174    case TGSI_OPCODE_POW:
    175       src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
    176       src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X);
    177       break;
    178    }
    179 
    180    inst->cond_modifier = cond_modifier;
    181 
    182    if (num_dst) {
    183       assert(num_dst == 1);
    184       inst->dst = dst[0];
    185    }
    186 
    187    assert(num_src <= ARRAY_SIZE(inst->src));
    188    for (i = 0; i < num_src; i++)
    189       inst->src[i] = src[i];
    190 }
    191 
    192 static void
    193 aos_set_on_cond(struct toy_compiler *tc,
    194                 const struct tgsi_full_instruction *tgsi_inst,
    195                 struct toy_dst *dst,
    196                 struct toy_src *src)
    197 {
    198    struct toy_inst *inst;
    199    int cond;
    200    struct toy_src zero, one;
    201 
    202    switch (tgsi_inst->Instruction.Opcode) {
    203    case TGSI_OPCODE_SLT:
    204    case TGSI_OPCODE_ISLT:
    205    case TGSI_OPCODE_USLT:
    206    case TGSI_OPCODE_FSLT:
    207       cond = GEN6_COND_L;
    208       break;
    209    case TGSI_OPCODE_SGE:
    210    case TGSI_OPCODE_ISGE:
    211    case TGSI_OPCODE_USGE:
    212    case TGSI_OPCODE_FSGE:
    213       cond = GEN6_COND_GE;
    214       break;
    215    case TGSI_OPCODE_SEQ:
    216    case TGSI_OPCODE_USEQ:
    217    case TGSI_OPCODE_FSEQ:
    218       cond = GEN6_COND_Z;
    219       break;
    220    case TGSI_OPCODE_SGT:
    221       cond = GEN6_COND_G;
    222       break;
    223    case TGSI_OPCODE_SLE:
    224       cond = GEN6_COND_LE;
    225       break;
    226    case TGSI_OPCODE_SNE:
    227    case TGSI_OPCODE_USNE:
    228    case TGSI_OPCODE_FSNE:
    229       cond = GEN6_COND_NZ;
    230       break;
    231    default:
    232       assert(!"invalid aos_set_on_cond() call");
    233       return;
    234    }
    235 
    236    /* note that for integer versions, all bits are set */
    237    switch (dst[0].type) {
    238    case TOY_TYPE_F:
    239    default:
    240       zero = tsrc_imm_f(0.0f);
    241       one = tsrc_imm_f(1.0f);
    242       break;
    243    case TOY_TYPE_D:
    244       zero = tsrc_imm_d(0);
    245       one = tsrc_imm_d(-1);
    246       break;
    247    case TOY_TYPE_UD:
    248       zero = tsrc_imm_ud(0);
    249       one = tsrc_imm_ud(~0);
    250       break;
    251    }
    252 
    253    tc_MOV(tc, dst[0], zero);
    254    tc_CMP(tc, tdst_null(), src[0], src[1], cond);
    255    inst = tc_MOV(tc, dst[0], one);
    256    inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
    257 }
    258 
    259 static void
    260 aos_compare(struct toy_compiler *tc,
    261             const struct tgsi_full_instruction *tgsi_inst,
    262             struct toy_dst *dst,
    263             struct toy_src *src)
    264 {
    265    struct toy_inst *inst;
    266    struct toy_src zero;
    267 
    268    switch (tgsi_inst->Instruction.Opcode) {
    269    case TGSI_OPCODE_CMP:
    270       zero = tsrc_imm_f(0.0f);
    271       break;
    272    case TGSI_OPCODE_UCMP:
    273       zero = tsrc_imm_ud(0);
    274       break;
    275    default:
    276       assert(!"invalid aos_compare() call");
    277       return;
    278    }
    279 
    280    tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L);
    281    inst = tc_SEL(tc, dst[0], src[1], src[2], GEN6_COND_NONE);
    282    inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
    283 }
    284 
    285 static void
    286 aos_set_sign(struct toy_compiler *tc,
    287              const struct tgsi_full_instruction *tgsi_inst,
    288              struct toy_dst *dst,
    289              struct toy_src *src)
    290 {
    291    struct toy_inst *inst;
    292    struct toy_src zero, one, neg_one;
    293 
    294    switch (tgsi_inst->Instruction.Opcode) {
    295    case TGSI_OPCODE_SSG:
    296       zero = tsrc_imm_f(0.0f);
    297       one = tsrc_imm_f(1.0f);
    298       neg_one = tsrc_imm_f(-1.0f);
    299       break;
    300    case TGSI_OPCODE_ISSG:
    301       zero = tsrc_imm_d(0);
    302       one = tsrc_imm_d(1);
    303       neg_one = tsrc_imm_d(-1);
    304       break;
    305    default:
    306       assert(!"invalid aos_set_sign() call");
    307       return;
    308    }
    309 
    310    tc_MOV(tc, dst[0], zero);
    311 
    312    tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_G);
    313    inst = tc_MOV(tc, dst[0], one);
    314    inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
    315 
    316    tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L);
    317    inst = tc_MOV(tc, dst[0], neg_one);
    318    inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
    319 }
    320 
    321 static void
    322 aos_tex(struct toy_compiler *tc,
    323         const struct tgsi_full_instruction *tgsi_inst,
    324         struct toy_dst *dst,
    325         struct toy_src *src)
    326 {
    327    struct toy_inst *inst;
    328    enum toy_opcode opcode;
    329    int i;
    330 
    331    switch (tgsi_inst->Instruction.Opcode) {
    332    case TGSI_OPCODE_TEX:
    333       opcode = TOY_OPCODE_TGSI_TEX;
    334       break;
    335    case TGSI_OPCODE_TXD:
    336       opcode = TOY_OPCODE_TGSI_TXD;
    337       break;
    338    case TGSI_OPCODE_TXP:
    339       opcode = TOY_OPCODE_TGSI_TXP;
    340       break;
    341    case TGSI_OPCODE_TXB:
    342       opcode = TOY_OPCODE_TGSI_TXB;
    343       break;
    344    case TGSI_OPCODE_TXL:
    345       opcode = TOY_OPCODE_TGSI_TXL;
    346       break;
    347    case TGSI_OPCODE_TXF:
    348       opcode = TOY_OPCODE_TGSI_TXF;
    349       break;
    350    case TGSI_OPCODE_TXQ:
    351       opcode = TOY_OPCODE_TGSI_TXQ;
    352       break;
    353    case TGSI_OPCODE_TXQ_LZ:
    354       opcode = TOY_OPCODE_TGSI_TXQ_LZ;
    355       break;
    356    case TGSI_OPCODE_TEX2:
    357       opcode = TOY_OPCODE_TGSI_TEX2;
    358       break;
    359    case TGSI_OPCODE_TXB2:
    360       opcode = TOY_OPCODE_TGSI_TXB2;
    361       break;
    362    case TGSI_OPCODE_TXL2:
    363       opcode = TOY_OPCODE_TGSI_TXL2;
    364       break;
    365    default:
    366       assert(!"unsupported texturing opcode");
    367       return;
    368       break;
    369    }
    370 
    371    assert(tgsi_inst->Instruction.Texture);
    372 
    373    inst = tc_add(tc);
    374    inst->opcode = opcode;
    375    inst->tex.target = tgsi_inst->Texture.Texture;
    376 
    377    assert(tgsi_inst->Instruction.NumSrcRegs <= ARRAY_SIZE(inst->src));
    378    assert(tgsi_inst->Instruction.NumDstRegs == 1);
    379 
    380    inst->dst = dst[0];
    381    for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
    382       inst->src[i] = src[i];
    383 
    384    for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++)
    385       tc_fail(tc, "texelFetchOffset unsupported");
    386 }
    387 
    388 static void
    389 aos_sample(struct toy_compiler *tc,
    390            const struct tgsi_full_instruction *tgsi_inst,
    391            struct toy_dst *dst,
    392            struct toy_src *src)
    393 {
    394    struct toy_inst *inst;
    395    enum toy_opcode opcode;
    396    int i;
    397 
    398    assert(!"sampling untested");
    399 
    400    switch (tgsi_inst->Instruction.Opcode) {
    401    case TGSI_OPCODE_SAMPLE:
    402       opcode = TOY_OPCODE_TGSI_SAMPLE;
    403       break;
    404    case TGSI_OPCODE_SAMPLE_I:
    405       opcode = TOY_OPCODE_TGSI_SAMPLE_I;
    406       break;
    407    case TGSI_OPCODE_SAMPLE_I_MS:
    408       opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS;
    409       break;
    410    case TGSI_OPCODE_SAMPLE_B:
    411       opcode = TOY_OPCODE_TGSI_SAMPLE_B;
    412       break;
    413    case TGSI_OPCODE_SAMPLE_C:
    414       opcode = TOY_OPCODE_TGSI_SAMPLE_C;
    415       break;
    416    case TGSI_OPCODE_SAMPLE_C_LZ:
    417       opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ;
    418       break;
    419    case TGSI_OPCODE_SAMPLE_D:
    420       opcode = TOY_OPCODE_TGSI_SAMPLE_D;
    421       break;
    422    case TGSI_OPCODE_SAMPLE_L:
    423       opcode = TOY_OPCODE_TGSI_SAMPLE_L;
    424       break;
    425    case TGSI_OPCODE_GATHER4:
    426       opcode = TOY_OPCODE_TGSI_GATHER4;
    427       break;
    428    case TGSI_OPCODE_SVIEWINFO:
    429       opcode = TOY_OPCODE_TGSI_SVIEWINFO;
    430       break;
    431    case TGSI_OPCODE_SAMPLE_POS:
    432       opcode = TOY_OPCODE_TGSI_SAMPLE_POS;
    433       break;
    434    case TGSI_OPCODE_SAMPLE_INFO:
    435       opcode = TOY_OPCODE_TGSI_SAMPLE_INFO;
    436       break;
    437    default:
    438       assert(!"unsupported sampling opcode");
    439       return;
    440       break;
    441    }
    442 
    443    inst = tc_add(tc);
    444    inst->opcode = opcode;
    445 
    446    assert(tgsi_inst->Instruction.NumSrcRegs <= ARRAY_SIZE(inst->src));
    447    assert(tgsi_inst->Instruction.NumDstRegs == 1);
    448 
    449    inst->dst = dst[0];
    450    for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
    451       inst->src[i] = src[i];
    452 }
    453 
    454 static void
    455 aos_LIT(struct toy_compiler *tc,
    456         const struct tgsi_full_instruction *tgsi_inst,
    457         struct toy_dst *dst,
    458         struct toy_src *src)
    459 {
    460    struct toy_inst *inst;
    461 
    462    tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f));
    463 
    464    if (!(dst[0].writemask & TOY_WRITEMASK_YZ))
    465       return;
    466 
    467    tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f));
    468 
    469    tc_CMP(tc, tdst_null(),
    470          tsrc_swizzle1(src[0], TOY_SWIZZLE_X),
    471          tsrc_imm_f(0.0f),
    472          GEN6_COND_G);
    473 
    474    inst = tc_MOV(tc,
    475          tdst_writemask(dst[0], TOY_WRITEMASK_Y),
    476          tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
    477    inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
    478 
    479    /* clamp W to (-128, 128)? */
    480    inst = tc_POW(tc,
    481          tdst_writemask(dst[0], TOY_WRITEMASK_Z),
    482          tsrc_swizzle1(src[0], TOY_SWIZZLE_Y),
    483          tsrc_swizzle1(src[0], TOY_SWIZZLE_W));
    484    inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
    485 }
    486 
    487 static void
    488 aos_EXP(struct toy_compiler *tc,
    489         const struct tgsi_full_instruction *tgsi_inst,
    490         struct toy_dst *dst,
    491         struct toy_src *src)
    492 {
    493    struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
    494 
    495    if (dst[0].writemask & TOY_WRITEMASK_X) {
    496       struct toy_dst tmp =
    497          tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
    498 
    499       tc_RNDD(tc, tmp, src0);
    500 
    501       /* construct the floating point number manually */
    502       tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
    503       tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)),
    504             tsrc_from(tmp), tsrc_imm_d(23));
    505    }
    506 
    507    tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0);
    508    tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
    509    tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
    510 }
    511 
    512 static void
    513 aos_LOG(struct toy_compiler *tc,
    514         const struct tgsi_full_instruction *tgsi_inst,
    515         struct toy_dst *dst,
    516         struct toy_src *src)
    517 {
    518    struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
    519 
    520    if (dst[0].writemask & TOY_WRITEMASK_XY) {
    521       struct toy_dst tmp;
    522 
    523       tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
    524 
    525       /* exponent */
    526       tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23));
    527       tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X),
    528             tsrc_from(tmp), tsrc_imm_d(-127));
    529 
    530       /* mantissa  */
    531       tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1));
    532       tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y),
    533             tsrc_from(tmp), tsrc_imm_d(127 << 23));
    534    }
    535 
    536    tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
    537    tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
    538 }
    539 
    540 static void
    541 aos_DST(struct toy_compiler *tc,
    542         const struct tgsi_full_instruction *tgsi_inst,
    543         struct toy_dst *dst,
    544         struct toy_src *src)
    545 {
    546    tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f));
    547    tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]);
    548    tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]);
    549    tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]);
    550 }
    551 
    552 static void
    553 aos_LRP(struct toy_compiler *tc,
    554         const struct tgsi_full_instruction *tgsi_inst,
    555         struct toy_dst *dst,
    556         struct toy_src *src)
    557 {
    558    struct toy_dst tmp = tc_alloc_tmp(tc);
    559 
    560    tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f));
    561    tc_MUL(tc, tmp, tsrc_from(tmp), src[2]);
    562    tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp));
    563 }
    564 
    565 static void
    566 aos_DP2A(struct toy_compiler *tc,
    567          const struct tgsi_full_instruction *tgsi_inst,
    568          struct toy_dst *dst,
    569          struct toy_src *src)
    570 {
    571    struct toy_dst tmp = tc_alloc_tmp(tc);
    572 
    573    assert(!"DP2A untested");
    574 
    575    tc_DP2(tc, tmp, src[0], src[1]);
    576    tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]);
    577 }
    578 
    579 static void
    580 aos_CLAMP(struct toy_compiler *tc,
    581           const struct tgsi_full_instruction *tgsi_inst,
    582           struct toy_dst *dst,
    583           struct toy_src *src)
    584 {
    585    assert(!"CLAMP untested");
    586 
    587    tc_SEL(tc, dst[0], src[0], src[1], GEN6_COND_GE);
    588    tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), GEN6_COND_L);
    589 }
    590 
    591 static void
    592 aos_XPD(struct toy_compiler *tc,
    593         const struct tgsi_full_instruction *tgsi_inst,
    594         struct toy_dst *dst,
    595         struct toy_src *src)
    596 {
    597    struct toy_dst tmp = tc_alloc_tmp(tc);
    598 
    599    tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ),
    600          tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
    601                               TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
    602          tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
    603                               TOY_SWIZZLE_X, TOY_SWIZZLE_W));
    604 
    605    tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
    606          tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
    607                               TOY_SWIZZLE_X, TOY_SWIZZLE_W),
    608          tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
    609                               TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
    610          tsrc_negate(tsrc_from(tmp)));
    611 
    612    tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W),
    613          tsrc_imm_f(1.0f));
    614 }
    615 
    616 static void
    617 aos_PK2H(struct toy_compiler *tc,
    618          const struct tgsi_full_instruction *tgsi_inst,
    619          struct toy_dst *dst,
    620          struct toy_src *src)
    621 {
    622    const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
    623    const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y));
    624    struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
    625 
    626    assert(!"PK2H untested");
    627 
    628    tc_SHL(tc, tmp, h2, tsrc_imm_ud(16));
    629    tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp));
    630 }
    631 
    632 static void
    633 aos_UP2H(struct toy_compiler *tc,
    634          const struct tgsi_full_instruction *tgsi_inst,
    635          struct toy_dst *dst,
    636          struct toy_src *src)
    637 {
    638    assert(!"UP2H untested");
    639 
    640    tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ),
    641          tsrc_ud(src[0]), tsrc_imm_ud(0xffff));
    642    tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW),
    643          tsrc_ud(src[0]), tsrc_imm_ud(16));
    644 }
    645 
    646 static void
    647 aos_SCS(struct toy_compiler *tc,
    648         const struct tgsi_full_instruction *tgsi_inst,
    649         struct toy_dst *dst,
    650         struct toy_src *src)
    651 {
    652    assert(!"SCS untested");
    653 
    654    tc_add1(tc, TOY_OPCODE_COS,
    655          tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]);
    656 
    657    tc_add1(tc, TOY_OPCODE_SIN,
    658          tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]);
    659 
    660    tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f));
    661    tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
    662 }
    663 
    664 static void
    665 aos_DIV(struct toy_compiler *tc,
    666         const struct tgsi_full_instruction *tgsi_inst,
    667         struct toy_dst *dst,
    668         struct toy_src *src)
    669 {
    670    struct toy_dst tmp = tc_alloc_tmp(tc);
    671 
    672    assert(!"DIV untested");
    673 
    674    tc_INV(tc, tmp, src[1]);
    675    tc_MUL(tc, dst[0], src[0], tsrc_from(tmp));
    676 }
    677 
    678 static void
    679 aos_BRK(struct toy_compiler *tc,
    680         const struct tgsi_full_instruction *tgsi_inst,
    681         struct toy_dst *dst,
    682         struct toy_src *src)
    683 {
    684    tc_add0(tc, GEN6_OPCODE_BREAK);
    685 }
    686 
    687 static void
    688 aos_CEIL(struct toy_compiler *tc,
    689          const struct tgsi_full_instruction *tgsi_inst,
    690          struct toy_dst *dst,
    691          struct toy_src *src)
    692 {
    693    struct toy_dst tmp = tc_alloc_tmp(tc);
    694 
    695    tc_RNDD(tc, tmp, tsrc_negate(src[0]));
    696    tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp)));
    697 }
    698 
    699 static void
    700 aos_SAD(struct toy_compiler *tc,
    701         const struct tgsi_full_instruction *tgsi_inst,
    702         struct toy_dst *dst,
    703         struct toy_src *src)
    704 {
    705    struct toy_dst tmp = tc_alloc_tmp(tc);
    706 
    707    assert(!"SAD untested");
    708 
    709    tc_ADD(tc, tmp, src[0], tsrc_negate(src[1]));
    710    tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]);
    711 }
    712 
    713 static void
    714 aos_CONT(struct toy_compiler *tc,
    715          const struct tgsi_full_instruction *tgsi_inst,
    716          struct toy_dst *dst,
    717          struct toy_src *src)
    718 {
    719    tc_add0(tc, GEN6_OPCODE_CONT);
    720 }
    721 
    722 static void
    723 aos_BGNLOOP(struct toy_compiler *tc,
    724             const struct tgsi_full_instruction *tgsi_inst,
    725             struct toy_dst *dst,
    726             struct toy_src *src)
    727 {
    728    struct toy_inst *inst;
    729 
    730    inst = tc_add0(tc, TOY_OPCODE_DO);
    731    /* this is just a marker */
    732    inst->marker = true;
    733 }
    734 
    735 static void
    736 aos_ENDLOOP(struct toy_compiler *tc,
    737             const struct tgsi_full_instruction *tgsi_inst,
    738             struct toy_dst *dst,
    739             struct toy_src *src)
    740 {
    741    tc_add0(tc, GEN6_OPCODE_WHILE);
    742 }
    743 
    744 static void
    745 aos_unsupported(struct toy_compiler *tc,
    746                 const struct tgsi_full_instruction *tgsi_inst,
    747                 struct toy_dst *dst,
    748                 struct toy_src *src)
    749 {
    750    const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode);
    751 
    752    ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name);
    753 
    754    tc_fail(tc, "unsupported TGSI instruction");
    755 }
    756 
    757 static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = {
    758    [TGSI_OPCODE_ARL]          = aos_simple,
    759    [TGSI_OPCODE_MOV]          = aos_simple,
    760    [TGSI_OPCODE_LIT]          = aos_LIT,
    761    [TGSI_OPCODE_RCP]          = aos_simple,
    762    [TGSI_OPCODE_RSQ]          = aos_simple,
    763    [TGSI_OPCODE_EXP]          = aos_EXP,
    764    [TGSI_OPCODE_LOG]          = aos_LOG,
    765    [TGSI_OPCODE_MUL]          = aos_simple,
    766    [TGSI_OPCODE_ADD]          = aos_simple,
    767    [TGSI_OPCODE_DP3]          = aos_simple,
    768    [TGSI_OPCODE_DP4]          = aos_simple,
    769    [TGSI_OPCODE_DST]          = aos_DST,
    770    [TGSI_OPCODE_MIN]          = aos_simple,
    771    [TGSI_OPCODE_MAX]          = aos_simple,
    772    [TGSI_OPCODE_SLT]          = aos_set_on_cond,
    773    [TGSI_OPCODE_SGE]          = aos_set_on_cond,
    774    [TGSI_OPCODE_MAD]          = aos_simple,
    775    [TGSI_OPCODE_LRP]          = aos_LRP,
    776    [TGSI_OPCODE_SQRT]         = aos_simple,
    777    [TGSI_OPCODE_DP2A]         = aos_DP2A,
    778    [TGSI_OPCODE_FRC]          = aos_simple,
    779    [TGSI_OPCODE_CLAMP]        = aos_CLAMP,
    780    [TGSI_OPCODE_FLR]          = aos_simple,
    781    [TGSI_OPCODE_ROUND]        = aos_simple,
    782    [TGSI_OPCODE_EX2]          = aos_simple,
    783    [TGSI_OPCODE_LG2]          = aos_simple,
    784    [TGSI_OPCODE_POW]          = aos_simple,
    785    [TGSI_OPCODE_XPD]          = aos_XPD,
    786    [TGSI_OPCODE_DPH]          = aos_simple,
    787    [TGSI_OPCODE_COS]          = aos_simple,
    788    [TGSI_OPCODE_DDX]          = aos_unsupported,
    789    [TGSI_OPCODE_DDY]          = aos_unsupported,
    790    [TGSI_OPCODE_KILL]         = aos_simple,
    791    [TGSI_OPCODE_PK2H]         = aos_PK2H,
    792    [TGSI_OPCODE_PK2US]        = aos_unsupported,
    793    [TGSI_OPCODE_PK4B]         = aos_unsupported,
    794    [TGSI_OPCODE_PK4UB]        = aos_unsupported,
    795    [TGSI_OPCODE_SEQ]          = aos_set_on_cond,
    796    [TGSI_OPCODE_SGT]          = aos_set_on_cond,
    797    [TGSI_OPCODE_SIN]          = aos_simple,
    798    [TGSI_OPCODE_SLE]          = aos_set_on_cond,
    799    [TGSI_OPCODE_SNE]          = aos_set_on_cond,
    800    [TGSI_OPCODE_TEX]          = aos_tex,
    801    [TGSI_OPCODE_TXD]          = aos_tex,
    802    [TGSI_OPCODE_TXP]          = aos_tex,
    803    [TGSI_OPCODE_UP2H]         = aos_UP2H,
    804    [TGSI_OPCODE_UP2US]        = aos_unsupported,
    805    [TGSI_OPCODE_UP4B]         = aos_unsupported,
    806    [TGSI_OPCODE_UP4UB]        = aos_unsupported,
    807    [TGSI_OPCODE_ARR]          = aos_simple,
    808    [TGSI_OPCODE_CAL]          = aos_unsupported,
    809    [TGSI_OPCODE_RET]          = aos_unsupported,
    810    [TGSI_OPCODE_SSG]          = aos_set_sign,
    811    [TGSI_OPCODE_CMP]          = aos_compare,
    812    [TGSI_OPCODE_SCS]          = aos_SCS,
    813    [TGSI_OPCODE_TXB]          = aos_tex,
    814    [TGSI_OPCODE_DIV]          = aos_DIV,
    815    [TGSI_OPCODE_DP2]          = aos_simple,
    816    [TGSI_OPCODE_TXL]          = aos_tex,
    817    [TGSI_OPCODE_BRK]          = aos_BRK,
    818    [TGSI_OPCODE_IF]           = aos_simple,
    819    [TGSI_OPCODE_UIF]          = aos_simple,
    820    [TGSI_OPCODE_ELSE]         = aos_simple,
    821    [TGSI_OPCODE_ENDIF]        = aos_simple,
    822    [TGSI_OPCODE_PUSHA]        = aos_unsupported,
    823    [TGSI_OPCODE_POPA]         = aos_unsupported,
    824    [TGSI_OPCODE_CEIL]         = aos_CEIL,
    825    [TGSI_OPCODE_I2F]          = aos_simple,
    826    [TGSI_OPCODE_NOT]          = aos_simple,
    827    [TGSI_OPCODE_TRUNC]        = aos_simple,
    828    [TGSI_OPCODE_SHL]          = aos_simple,
    829    [TGSI_OPCODE_AND]          = aos_simple,
    830    [TGSI_OPCODE_OR]           = aos_simple,
    831    [TGSI_OPCODE_MOD]          = aos_simple,
    832    [TGSI_OPCODE_XOR]          = aos_simple,
    833    [TGSI_OPCODE_SAD]          = aos_SAD,
    834    [TGSI_OPCODE_TXF]          = aos_tex,
    835    [TGSI_OPCODE_TXQ]          = aos_tex,
    836    [TGSI_OPCODE_CONT]         = aos_CONT,
    837    [TGSI_OPCODE_EMIT]         = aos_simple,
    838    [TGSI_OPCODE_ENDPRIM]      = aos_simple,
    839    [TGSI_OPCODE_BGNLOOP]      = aos_BGNLOOP,
    840    [TGSI_OPCODE_BGNSUB]       = aos_unsupported,
    841    [TGSI_OPCODE_ENDLOOP]      = aos_ENDLOOP,
    842    [TGSI_OPCODE_ENDSUB]       = aos_unsupported,
    843    [TGSI_OPCODE_TXQ_LZ]       = aos_tex,
    844    [TGSI_OPCODE_NOP]          = aos_simple,
    845    [TGSI_OPCODE_FSEQ]         = aos_set_on_cond,
    846    [TGSI_OPCODE_FSGE]         = aos_set_on_cond,
    847    [TGSI_OPCODE_FSLT]         = aos_set_on_cond,
    848    [TGSI_OPCODE_FSNE]         = aos_set_on_cond,
    849    [TGSI_OPCODE_CALLNZ]       = aos_unsupported,
    850    [TGSI_OPCODE_BREAKC]       = aos_unsupported,
    851    [TGSI_OPCODE_KILL_IF]      = aos_simple,
    852    [TGSI_OPCODE_END]          = aos_simple,
    853    [TGSI_OPCODE_F2I]          = aos_simple,
    854    [TGSI_OPCODE_IDIV]         = aos_simple,
    855    [TGSI_OPCODE_IMAX]         = aos_simple,
    856    [TGSI_OPCODE_IMIN]         = aos_simple,
    857    [TGSI_OPCODE_INEG]         = aos_simple,
    858    [TGSI_OPCODE_ISGE]         = aos_set_on_cond,
    859    [TGSI_OPCODE_ISHR]         = aos_simple,
    860    [TGSI_OPCODE_ISLT]         = aos_set_on_cond,
    861    [TGSI_OPCODE_F2U]          = aos_simple,
    862    [TGSI_OPCODE_U2F]          = aos_simple,
    863    [TGSI_OPCODE_UADD]         = aos_simple,
    864    [TGSI_OPCODE_UDIV]         = aos_simple,
    865    [TGSI_OPCODE_UMAD]         = aos_simple,
    866    [TGSI_OPCODE_UMAX]         = aos_simple,
    867    [TGSI_OPCODE_UMIN]         = aos_simple,
    868    [TGSI_OPCODE_UMOD]         = aos_simple,
    869    [TGSI_OPCODE_UMUL]         = aos_simple,
    870    [TGSI_OPCODE_USEQ]         = aos_set_on_cond,
    871    [TGSI_OPCODE_USGE]         = aos_set_on_cond,
    872    [TGSI_OPCODE_USHR]         = aos_simple,
    873    [TGSI_OPCODE_USLT]         = aos_set_on_cond,
    874    [TGSI_OPCODE_USNE]         = aos_set_on_cond,
    875    [TGSI_OPCODE_SWITCH]       = aos_unsupported,
    876    [TGSI_OPCODE_CASE]         = aos_unsupported,
    877    [TGSI_OPCODE_DEFAULT]      = aos_unsupported,
    878    [TGSI_OPCODE_ENDSWITCH]    = aos_unsupported,
    879    [TGSI_OPCODE_SAMPLE]       = aos_sample,
    880    [TGSI_OPCODE_SAMPLE_I]     = aos_sample,
    881    [TGSI_OPCODE_SAMPLE_I_MS]  = aos_sample,
    882    [TGSI_OPCODE_SAMPLE_B]     = aos_sample,
    883    [TGSI_OPCODE_SAMPLE_C]     = aos_sample,
    884    [TGSI_OPCODE_SAMPLE_C_LZ]  = aos_sample,
    885    [TGSI_OPCODE_SAMPLE_D]     = aos_sample,
    886    [TGSI_OPCODE_SAMPLE_L]     = aos_sample,
    887    [TGSI_OPCODE_GATHER4]      = aos_sample,
    888    [TGSI_OPCODE_SVIEWINFO]    = aos_sample,
    889    [TGSI_OPCODE_SAMPLE_POS]   = aos_sample,
    890    [TGSI_OPCODE_SAMPLE_INFO]  = aos_sample,
    891    [TGSI_OPCODE_UARL]         = aos_simple,
    892    [TGSI_OPCODE_UCMP]         = aos_compare,
    893    [TGSI_OPCODE_IABS]         = aos_simple,
    894    [TGSI_OPCODE_ISSG]         = aos_set_sign,
    895    [TGSI_OPCODE_LOAD]         = aos_unsupported,
    896    [TGSI_OPCODE_STORE]        = aos_unsupported,
    897    [TGSI_OPCODE_MFENCE]       = aos_unsupported,
    898    [TGSI_OPCODE_LFENCE]       = aos_unsupported,
    899    [TGSI_OPCODE_SFENCE]       = aos_unsupported,
    900    [TGSI_OPCODE_BARRIER]      = aos_unsupported,
    901    [TGSI_OPCODE_ATOMUADD]     = aos_unsupported,
    902    [TGSI_OPCODE_ATOMXCHG]     = aos_unsupported,
    903    [TGSI_OPCODE_ATOMCAS]      = aos_unsupported,
    904    [TGSI_OPCODE_ATOMAND]      = aos_unsupported,
    905    [TGSI_OPCODE_ATOMOR]       = aos_unsupported,
    906    [TGSI_OPCODE_ATOMXOR]      = aos_unsupported,
    907    [TGSI_OPCODE_ATOMUMIN]     = aos_unsupported,
    908    [TGSI_OPCODE_ATOMUMAX]     = aos_unsupported,
    909    [TGSI_OPCODE_ATOMIMIN]     = aos_unsupported,
    910    [TGSI_OPCODE_ATOMIMAX]     = aos_unsupported,
    911    [TGSI_OPCODE_TEX2]         = aos_tex,
    912    [TGSI_OPCODE_TXB2]         = aos_tex,
    913    [TGSI_OPCODE_TXL2]         = aos_tex,
    914 };
    915 
    916 static void
    917 soa_passthrough(struct toy_compiler *tc,
    918                 const struct tgsi_full_instruction *tgsi_inst,
    919                 struct toy_dst *dst_,
    920                 struct toy_src *src_)
    921 {
    922    const toy_tgsi_translate translate =
    923       aos_translate_table[tgsi_inst->Instruction.Opcode];
    924 
    925    translate(tc, tgsi_inst, dst_, src_);
    926 }
    927 
    928 static void
    929 soa_per_channel(struct toy_compiler *tc,
    930                 const struct tgsi_full_instruction *tgsi_inst,
    931                 struct toy_dst *dst_,
    932                 struct toy_src *src_)
    933 {
    934    struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4];
    935    struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
    936    int i, ch;
    937 
    938    for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
    939       tdst_transpose(dst_[i], dst[i]);
    940    for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
    941       tsrc_transpose(src_[i], src[i]);
    942 
    943    /* emit the same instruction four times for the four channels */
    944    for (ch = 0; ch < 4; ch++) {
    945       struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS];
    946       struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS];
    947 
    948       for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
    949          aos_dst[i] = dst[i][ch];
    950       for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
    951          aos_src[i] = src[i][ch];
    952 
    953       aos_translate_table[tgsi_inst->Instruction.Opcode](tc,
    954             tgsi_inst, aos_dst, aos_src);
    955    }
    956 }
    957 
    958 static void
    959 soa_scalar_replicate(struct toy_compiler *tc,
    960                      const struct tgsi_full_instruction *tgsi_inst,
    961                      struct toy_dst *dst_,
    962                      struct toy_src *src_)
    963 {
    964    struct toy_dst dst0[4], tmp;
    965    struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS];
    966    int opcode, i;
    967 
    968    assert(tgsi_inst->Instruction.NumDstRegs == 1);
    969 
    970    tdst_transpose(dst_[0], dst0);
    971    for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
    972       struct toy_src tmp[4];
    973 
    974       tsrc_transpose(src_[i], tmp);
    975       /* only the X channels */
    976       srcx[i] = tmp[0];
    977    }
    978 
    979    tmp = tc_alloc_tmp(tc);
    980 
    981    opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
    982    assert(opcode);
    983 
    984    switch (tgsi_inst->Instruction.Opcode) {
    985    case TGSI_OPCODE_RCP:
    986    case TGSI_OPCODE_RSQ:
    987    case TGSI_OPCODE_SQRT:
    988    case TGSI_OPCODE_EX2:
    989    case TGSI_OPCODE_LG2:
    990    case TGSI_OPCODE_COS:
    991    case TGSI_OPCODE_SIN:
    992       tc_add1(tc, opcode, tmp, srcx[0]);
    993       break;
    994    case TGSI_OPCODE_POW:
    995       tc_add2(tc, opcode, tmp, srcx[0], srcx[1]);
    996       break;
    997    default:
    998       assert(!"invalid soa_scalar_replicate() call");
    999       return;
   1000    }
   1001 
   1002    /* replicate the result */
   1003    for (i = 0; i < 4; i++)
   1004       tc_MOV(tc, dst0[i], tsrc_from(tmp));
   1005 }
   1006 
   1007 static void
   1008 soa_dot_product(struct toy_compiler *tc,
   1009                 const struct tgsi_full_instruction *tgsi_inst,
   1010                 struct toy_dst *dst_,
   1011                 struct toy_src *src_)
   1012 {
   1013    struct toy_dst dst0[4], tmp;
   1014    struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
   1015    int i;
   1016 
   1017    tdst_transpose(dst_[0], dst0);
   1018    for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
   1019       tsrc_transpose(src_[i], src[i]);
   1020 
   1021    tmp = tc_alloc_tmp(tc);
   1022 
   1023    switch (tgsi_inst->Instruction.Opcode) {
   1024    case TGSI_OPCODE_DP2:
   1025       tc_MUL(tc, tmp, src[0][1], src[1][1]);
   1026       tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
   1027       break;
   1028    case TGSI_OPCODE_DP2A:
   1029       tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]);
   1030       tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
   1031       break;
   1032    case TGSI_OPCODE_DP3:
   1033       tc_MUL(tc, tmp, src[0][2], src[1][2]);
   1034       tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
   1035       tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
   1036       break;
   1037    case TGSI_OPCODE_DPH:
   1038       tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]);
   1039       tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
   1040       tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
   1041       break;
   1042    case TGSI_OPCODE_DP4:
   1043       tc_MUL(tc, tmp, src[0][3], src[1][3]);
   1044       tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp));
   1045       tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
   1046       tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
   1047       break;
   1048    default:
   1049       assert(!"invalid soa_dot_product() call");
   1050       return;
   1051    }
   1052 
   1053    for (i = 0; i < 4; i++)
   1054       tc_MOV(tc, dst0[i], tsrc_from(tmp));
   1055 }
   1056 
   1057 static void
   1058 soa_partial_derivative(struct toy_compiler *tc,
   1059                        const struct tgsi_full_instruction *tgsi_inst,
   1060                        struct toy_dst *dst_,
   1061                        struct toy_src *src_)
   1062 {
   1063    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX)
   1064       tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]);
   1065    else
   1066       tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]);
   1067 }
   1068 
   1069 static void
   1070 soa_if(struct toy_compiler *tc,
   1071        const struct tgsi_full_instruction *tgsi_inst,
   1072        struct toy_dst *dst_,
   1073        struct toy_src *src_)
   1074 {
   1075    struct toy_src src0[4];
   1076 
   1077    assert(tsrc_is_swizzle1(src_[0]));
   1078    tsrc_transpose(src_[0], src0);
   1079 
   1080    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF)
   1081       tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_NZ);
   1082    else
   1083       tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), GEN6_COND_NZ);
   1084 }
   1085 
   1086 static void
   1087 soa_LIT(struct toy_compiler *tc,
   1088         const struct tgsi_full_instruction *tgsi_inst,
   1089         struct toy_dst *dst_,
   1090         struct toy_src *src_)
   1091 {
   1092    struct toy_inst *inst;
   1093    struct toy_dst dst0[4];
   1094    struct toy_src src0[4];
   1095 
   1096    tdst_transpose(dst_[0], dst0);
   1097    tsrc_transpose(src_[0], src0);
   1098 
   1099    tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
   1100    tc_MOV(tc, dst0[1], src0[0]);
   1101    tc_POW(tc, dst0[2], src0[1], src0[3]);
   1102    tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
   1103 
   1104    /*
   1105     * POW is calculated first because math with pred_ctrl is broken here.
   1106     * But, why?
   1107     */
   1108    tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_L);
   1109    inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f));
   1110    inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
   1111    inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
   1112    inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
   1113 }
   1114 
   1115 static void
   1116 soa_EXP(struct toy_compiler *tc,
   1117         const struct tgsi_full_instruction *tgsi_inst,
   1118         struct toy_dst *dst_,
   1119         struct toy_src *src_)
   1120 {
   1121    struct toy_dst dst0[4];
   1122    struct toy_src src0[4];
   1123 
   1124    assert(!"SoA EXP untested");
   1125 
   1126    tdst_transpose(dst_[0], dst0);
   1127    tsrc_transpose(src_[0], src0);
   1128 
   1129    if (!tdst_is_null(dst0[0])) {
   1130       struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
   1131 
   1132       tc_RNDD(tc, tmp, src0[0]);
   1133 
   1134       /* construct the floating point number manually */
   1135       tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
   1136       tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23));
   1137    }
   1138 
   1139    tc_FRC(tc, dst0[1], src0[0]);
   1140    tc_EXP(tc, dst0[2], src0[0]);
   1141    tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
   1142 }
   1143 
   1144 static void
   1145 soa_LOG(struct toy_compiler *tc,
   1146         const struct tgsi_full_instruction *tgsi_inst,
   1147         struct toy_dst *dst_,
   1148         struct toy_src *src_)
   1149 {
   1150    struct toy_dst dst0[4];
   1151    struct toy_src src0[4];
   1152 
   1153    assert(!"SoA LOG untested");
   1154 
   1155    tdst_transpose(dst_[0], dst0);
   1156    tsrc_transpose(src_[0], src0);
   1157 
   1158    if (dst_[0].writemask & TOY_WRITEMASK_XY) {
   1159       struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
   1160 
   1161       /* exponent */
   1162       tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23));
   1163       tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127));
   1164 
   1165       /* mantissa  */
   1166       tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1));
   1167       tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23));
   1168    }
   1169 
   1170    tc_LOG(tc, dst0[2], src0[0]);
   1171    tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
   1172 }
   1173 
   1174 static void
   1175 soa_DST(struct toy_compiler *tc,
   1176         const struct tgsi_full_instruction *tgsi_inst,
   1177         struct toy_dst *dst_,
   1178         struct toy_src *src_)
   1179 {
   1180    struct toy_dst dst0[4];
   1181    struct toy_src src[2][4];
   1182 
   1183    tdst_transpose(dst_[0], dst0);
   1184    tsrc_transpose(src_[0], src[0]);
   1185    tsrc_transpose(src_[1], src[1]);
   1186 
   1187    tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
   1188    tc_MUL(tc, dst0[1], src[0][1], src[1][1]);
   1189    tc_MOV(tc, dst0[2], src[0][2]);
   1190    tc_MOV(tc, dst0[3], src[1][3]);
   1191 }
   1192 
   1193 static void
   1194 soa_XPD(struct toy_compiler *tc,
   1195         const struct tgsi_full_instruction *tgsi_inst,
   1196         struct toy_dst *dst_,
   1197         struct toy_src *src_)
   1198 {
   1199    struct toy_dst dst0[4];
   1200    struct toy_src src[2][4];
   1201 
   1202    tdst_transpose(dst_[0], dst0);
   1203    tsrc_transpose(src_[0], src[0]);
   1204    tsrc_transpose(src_[1], src[1]);
   1205 
   1206    /* dst.x = src0.y * src1.z - src1.y * src0.z */
   1207    tc_MUL(tc, dst0[0], src[0][2], src[1][1]);
   1208    tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0])));
   1209 
   1210    /* dst.y = src0.z * src1.x - src1.z * src0.x */
   1211    tc_MUL(tc, dst0[1], src[0][0], src[1][2]);
   1212    tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1])));
   1213 
   1214    /* dst.z = src0.x * src1.y - src1.x * src0.y */
   1215    tc_MUL(tc, dst0[2], src[0][1], src[1][0]);
   1216    tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2])));
   1217 
   1218    tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
   1219 }
   1220 
   1221 static void
   1222 soa_PK2H(struct toy_compiler *tc,
   1223          const struct tgsi_full_instruction *tgsi_inst,
   1224          struct toy_dst *dst_,
   1225          struct toy_src *src_)
   1226 {
   1227    struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
   1228    struct toy_dst dst0[4];
   1229    struct toy_src src0[4];
   1230    int i;
   1231 
   1232    assert(!"SoA PK2H untested");
   1233 
   1234    tdst_transpose(dst_[0], dst0);
   1235    tsrc_transpose(src_[0], src0);
   1236 
   1237    tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16));
   1238    tc_OR(tc, tmp, src0[0], tsrc_from(tmp));
   1239 
   1240    for (i = 0; i < 4; i++)
   1241       tc_MOV(tc, dst0[i], tsrc_from(tmp));
   1242 }
   1243 
   1244 static void
   1245 soa_UP2H(struct toy_compiler *tc,
   1246          const struct tgsi_full_instruction *tgsi_inst,
   1247          struct toy_dst *dst_,
   1248          struct toy_src *src_)
   1249 {
   1250    struct toy_dst dst0[4];
   1251    struct toy_src src0[4];
   1252 
   1253    assert(!"SoA UP2H untested");
   1254 
   1255    tdst_transpose(dst_[0], dst0);
   1256    tsrc_transpose(src_[0], src0);
   1257 
   1258    tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff));
   1259    tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16));
   1260    tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff));
   1261    tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16));
   1262 
   1263 }
   1264 
   1265 static void
   1266 soa_SCS(struct toy_compiler *tc,
   1267         const struct tgsi_full_instruction *tgsi_inst,
   1268         struct toy_dst *dst_,
   1269         struct toy_src *src_)
   1270 {
   1271    struct toy_dst dst0[4];
   1272    struct toy_src src0[4];
   1273 
   1274    tdst_transpose(dst_[0], dst0);
   1275    tsrc_transpose(src_[0], src0);
   1276 
   1277    tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]);
   1278    tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]);
   1279    tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
   1280    tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
   1281 }
   1282 
   1283 static void
   1284 soa_unsupported(struct toy_compiler *tc,
   1285                 const struct tgsi_full_instruction *tgsi_inst,
   1286                 struct toy_dst *dst_,
   1287                 struct toy_src *src_)
   1288 {
   1289    const struct tgsi_opcode_info *info =
   1290       tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode);
   1291 
   1292    ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n",
   1293          info->mnemonic);
   1294 
   1295    tc_fail(tc, "unsupported TGSI instruction in SoA form");
   1296 }
   1297 
   1298 static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = {
   1299    [TGSI_OPCODE_ARL]          = soa_per_channel,
   1300    [TGSI_OPCODE_MOV]          = soa_per_channel,
   1301    [TGSI_OPCODE_LIT]          = soa_LIT,
   1302    [TGSI_OPCODE_RCP]          = soa_scalar_replicate,
   1303    [TGSI_OPCODE_RSQ]          = soa_scalar_replicate,
   1304    [TGSI_OPCODE_EXP]          = soa_EXP,
   1305    [TGSI_OPCODE_LOG]          = soa_LOG,
   1306    [TGSI_OPCODE_MUL]          = soa_per_channel,
   1307    [TGSI_OPCODE_ADD]          = soa_per_channel,
   1308    [TGSI_OPCODE_DP3]          = soa_dot_product,
   1309    [TGSI_OPCODE_DP4]          = soa_dot_product,
   1310    [TGSI_OPCODE_DST]          = soa_DST,
   1311    [TGSI_OPCODE_MIN]          = soa_per_channel,
   1312    [TGSI_OPCODE_MAX]          = soa_per_channel,
   1313    [TGSI_OPCODE_SLT]          = soa_per_channel,
   1314    [TGSI_OPCODE_SGE]          = soa_per_channel,
   1315    [TGSI_OPCODE_MAD]          = soa_per_channel,
   1316    [TGSI_OPCODE_LRP]          = soa_per_channel,
   1317    [TGSI_OPCODE_SQRT]         = soa_scalar_replicate,
   1318    [TGSI_OPCODE_DP2A]         = soa_dot_product,
   1319    [TGSI_OPCODE_FRC]          = soa_per_channel,
   1320    [TGSI_OPCODE_CLAMP]        = soa_per_channel,
   1321    [TGSI_OPCODE_FLR]          = soa_per_channel,
   1322    [TGSI_OPCODE_ROUND]        = soa_per_channel,
   1323    [TGSI_OPCODE_EX2]          = soa_scalar_replicate,
   1324    [TGSI_OPCODE_LG2]          = soa_scalar_replicate,
   1325    [TGSI_OPCODE_POW]          = soa_scalar_replicate,
   1326    [TGSI_OPCODE_XPD]          = soa_XPD,
   1327    [TGSI_OPCODE_DPH]          = soa_dot_product,
   1328    [TGSI_OPCODE_COS]          = soa_scalar_replicate,
   1329    [TGSI_OPCODE_DDX]          = soa_partial_derivative,
   1330    [TGSI_OPCODE_DDY]          = soa_partial_derivative,
   1331    [TGSI_OPCODE_KILL]         = soa_passthrough,
   1332    [TGSI_OPCODE_PK2H]         = soa_PK2H,
   1333    [TGSI_OPCODE_PK2US]        = soa_unsupported,
   1334    [TGSI_OPCODE_PK4B]         = soa_unsupported,
   1335    [TGSI_OPCODE_PK4UB]        = soa_unsupported,
   1336    [TGSI_OPCODE_SEQ]          = soa_per_channel,
   1337    [TGSI_OPCODE_SGT]          = soa_per_channel,
   1338    [TGSI_OPCODE_SIN]          = soa_scalar_replicate,
   1339    [TGSI_OPCODE_SLE]          = soa_per_channel,
   1340    [TGSI_OPCODE_SNE]          = soa_per_channel,
   1341    [TGSI_OPCODE_TEX]          = soa_passthrough,
   1342    [TGSI_OPCODE_TXD]          = soa_passthrough,
   1343    [TGSI_OPCODE_TXP]          = soa_passthrough,
   1344    [TGSI_OPCODE_UP2H]         = soa_UP2H,
   1345    [TGSI_OPCODE_UP2US]        = soa_unsupported,
   1346    [TGSI_OPCODE_UP4B]         = soa_unsupported,
   1347    [TGSI_OPCODE_UP4UB]        = soa_unsupported,
   1348    [TGSI_OPCODE_ARR]          = soa_per_channel,
   1349    [TGSI_OPCODE_CAL]          = soa_unsupported,
   1350    [TGSI_OPCODE_RET]          = soa_unsupported,
   1351    [TGSI_OPCODE_SSG]          = soa_per_channel,
   1352    [TGSI_OPCODE_CMP]          = soa_per_channel,
   1353    [TGSI_OPCODE_SCS]          = soa_SCS,
   1354    [TGSI_OPCODE_TXB]          = soa_passthrough,
   1355    [TGSI_OPCODE_DIV]          = soa_per_channel,
   1356    [TGSI_OPCODE_DP2]          = soa_dot_product,
   1357    [TGSI_OPCODE_TXL]          = soa_passthrough,
   1358    [TGSI_OPCODE_BRK]          = soa_passthrough,
   1359    [TGSI_OPCODE_IF]           = soa_if,
   1360    [TGSI_OPCODE_UIF]          = soa_if,
   1361    [TGSI_OPCODE_ELSE]         = soa_passthrough,
   1362    [TGSI_OPCODE_ENDIF]        = soa_passthrough,
   1363    [TGSI_OPCODE_PUSHA]        = soa_unsupported,
   1364    [TGSI_OPCODE_POPA]         = soa_unsupported,
   1365    [TGSI_OPCODE_CEIL]         = soa_per_channel,
   1366    [TGSI_OPCODE_I2F]          = soa_per_channel,
   1367    [TGSI_OPCODE_NOT]          = soa_per_channel,
   1368    [TGSI_OPCODE_TRUNC]        = soa_per_channel,
   1369    [TGSI_OPCODE_SHL]          = soa_per_channel,
   1370    [TGSI_OPCODE_AND]          = soa_per_channel,
   1371    [TGSI_OPCODE_OR]           = soa_per_channel,
   1372    [TGSI_OPCODE_MOD]          = soa_per_channel,
   1373    [TGSI_OPCODE_XOR]          = soa_per_channel,
   1374    [TGSI_OPCODE_SAD]          = soa_per_channel,
   1375    [TGSI_OPCODE_TXF]          = soa_passthrough,
   1376    [TGSI_OPCODE_TXQ]          = soa_passthrough,
   1377    [TGSI_OPCODE_CONT]         = soa_passthrough,
   1378    [TGSI_OPCODE_EMIT]         = soa_unsupported,
   1379    [TGSI_OPCODE_ENDPRIM]      = soa_unsupported,
   1380    [TGSI_OPCODE_BGNLOOP]      = soa_passthrough,
   1381    [TGSI_OPCODE_BGNSUB]       = soa_unsupported,
   1382    [TGSI_OPCODE_ENDLOOP]      = soa_passthrough,
   1383    [TGSI_OPCODE_ENDSUB]       = soa_unsupported,
   1384    [TGSI_OPCODE_TXQ_LZ]       = soa_passthrough,
   1385    [TGSI_OPCODE_NOP]          = soa_passthrough,
   1386    [TGSI_OPCODE_FSEQ]         = soa_per_channel,
   1387    [TGSI_OPCODE_FSGE]         = soa_per_channel,
   1388    [TGSI_OPCODE_FSLT]         = soa_per_channel,
   1389    [TGSI_OPCODE_FSNE]         = soa_per_channel,
   1390    [TGSI_OPCODE_CALLNZ]       = soa_unsupported,
   1391    [TGSI_OPCODE_BREAKC]       = soa_unsupported,
   1392    [TGSI_OPCODE_KILL_IF]          = soa_passthrough,
   1393    [TGSI_OPCODE_END]          = soa_passthrough,
   1394    [TGSI_OPCODE_F2I]          = soa_per_channel,
   1395    [TGSI_OPCODE_IDIV]         = soa_per_channel,
   1396    [TGSI_OPCODE_IMAX]         = soa_per_channel,
   1397    [TGSI_OPCODE_IMIN]         = soa_per_channel,
   1398    [TGSI_OPCODE_INEG]         = soa_per_channel,
   1399    [TGSI_OPCODE_ISGE]         = soa_per_channel,
   1400    [TGSI_OPCODE_ISHR]         = soa_per_channel,
   1401    [TGSI_OPCODE_ISLT]         = soa_per_channel,
   1402    [TGSI_OPCODE_F2U]          = soa_per_channel,
   1403    [TGSI_OPCODE_U2F]          = soa_per_channel,
   1404    [TGSI_OPCODE_UADD]         = soa_per_channel,
   1405    [TGSI_OPCODE_UDIV]         = soa_per_channel,
   1406    [TGSI_OPCODE_UMAD]         = soa_per_channel,
   1407    [TGSI_OPCODE_UMAX]         = soa_per_channel,
   1408    [TGSI_OPCODE_UMIN]         = soa_per_channel,
   1409    [TGSI_OPCODE_UMOD]         = soa_per_channel,
   1410    [TGSI_OPCODE_UMUL]         = soa_per_channel,
   1411    [TGSI_OPCODE_USEQ]         = soa_per_channel,
   1412    [TGSI_OPCODE_USGE]         = soa_per_channel,
   1413    [TGSI_OPCODE_USHR]         = soa_per_channel,
   1414    [TGSI_OPCODE_USLT]         = soa_per_channel,
   1415    [TGSI_OPCODE_USNE]         = soa_per_channel,
   1416    [TGSI_OPCODE_SWITCH]       = soa_unsupported,
   1417    [TGSI_OPCODE_CASE]         = soa_unsupported,
   1418    [TGSI_OPCODE_DEFAULT]      = soa_unsupported,
   1419    [TGSI_OPCODE_ENDSWITCH]    = soa_unsupported,
   1420    [TGSI_OPCODE_SAMPLE]       = soa_passthrough,
   1421    [TGSI_OPCODE_SAMPLE_I]     = soa_passthrough,
   1422    [TGSI_OPCODE_SAMPLE_I_MS]  = soa_passthrough,
   1423    [TGSI_OPCODE_SAMPLE_B]     = soa_passthrough,
   1424    [TGSI_OPCODE_SAMPLE_C]     = soa_passthrough,
   1425    [TGSI_OPCODE_SAMPLE_C_LZ]  = soa_passthrough,
   1426    [TGSI_OPCODE_SAMPLE_D]     = soa_passthrough,
   1427    [TGSI_OPCODE_SAMPLE_L]     = soa_passthrough,
   1428    [TGSI_OPCODE_GATHER4]      = soa_passthrough,
   1429    [TGSI_OPCODE_SVIEWINFO]    = soa_passthrough,
   1430    [TGSI_OPCODE_SAMPLE_POS]   = soa_passthrough,
   1431    [TGSI_OPCODE_SAMPLE_INFO]  = soa_passthrough,
   1432    [TGSI_OPCODE_UARL]         = soa_per_channel,
   1433    [TGSI_OPCODE_UCMP]         = soa_per_channel,
   1434    [TGSI_OPCODE_IABS]         = soa_per_channel,
   1435    [TGSI_OPCODE_ISSG]         = soa_per_channel,
   1436    [TGSI_OPCODE_LOAD]         = soa_unsupported,
   1437    [TGSI_OPCODE_STORE]        = soa_unsupported,
   1438    [TGSI_OPCODE_MFENCE]       = soa_unsupported,
   1439    [TGSI_OPCODE_LFENCE]       = soa_unsupported,
   1440    [TGSI_OPCODE_SFENCE]       = soa_unsupported,
   1441    [TGSI_OPCODE_BARRIER]      = soa_unsupported,
   1442    [TGSI_OPCODE_ATOMUADD]     = soa_unsupported,
   1443    [TGSI_OPCODE_ATOMXCHG]     = soa_unsupported,
   1444    [TGSI_OPCODE_ATOMCAS]      = soa_unsupported,
   1445    [TGSI_OPCODE_ATOMAND]      = soa_unsupported,
   1446    [TGSI_OPCODE_ATOMOR]       = soa_unsupported,
   1447    [TGSI_OPCODE_ATOMXOR]      = soa_unsupported,
   1448    [TGSI_OPCODE_ATOMUMIN]     = soa_unsupported,
   1449    [TGSI_OPCODE_ATOMUMAX]     = soa_unsupported,
   1450    [TGSI_OPCODE_ATOMIMIN]     = soa_unsupported,
   1451    [TGSI_OPCODE_ATOMIMAX]     = soa_unsupported,
   1452    [TGSI_OPCODE_TEX2]         = soa_passthrough,
   1453    [TGSI_OPCODE_TXB2]         = soa_passthrough,
   1454    [TGSI_OPCODE_TXL2]         = soa_passthrough,
   1455 };
   1456 
   1457 static bool
   1458 ra_dst_is_indirect(const struct tgsi_full_dst_register *d)
   1459 {
   1460    return (d->Register.Indirect ||
   1461          (d->Register.Dimension && d->Dimension.Indirect));
   1462 }
   1463 
   1464 static int
   1465 ra_dst_index(const struct tgsi_full_dst_register *d)
   1466 {
   1467    assert(!d->Register.Indirect);
   1468    return d->Register.Index;
   1469 }
   1470 
   1471 static int
   1472 ra_dst_dimension(const struct tgsi_full_dst_register *d)
   1473 {
   1474    if (d->Register.Dimension) {
   1475       assert(!d->Dimension.Indirect);
   1476       return d->Dimension.Index;
   1477    }
   1478    else {
   1479       return 0;
   1480    }
   1481 }
   1482 
   1483 static bool
   1484 ra_is_src_indirect(const struct tgsi_full_src_register *s)
   1485 {
   1486    return (s->Register.Indirect ||
   1487          (s->Register.Dimension && s->Dimension.Indirect));
   1488 }
   1489 
   1490 static int
   1491 ra_src_index(const struct tgsi_full_src_register *s)
   1492 {
   1493    assert(!s->Register.Indirect);
   1494    return s->Register.Index;
   1495 }
   1496 
   1497 static int
   1498 ra_src_dimension(const struct tgsi_full_src_register *s)
   1499 {
   1500    if (s->Register.Dimension) {
   1501       assert(!s->Dimension.Indirect);
   1502       return s->Dimension.Index;
   1503    }
   1504    else {
   1505       return 0;
   1506    }
   1507 }
   1508 
   1509 /**
   1510  * Infer the type of either the sources or the destination.
   1511  */
   1512 static enum toy_type
   1513 ra_infer_opcode_type(int tgsi_opcode, bool is_dst)
   1514 {
   1515    enum tgsi_opcode_type type;
   1516 
   1517    if (is_dst)
   1518       type = tgsi_opcode_infer_dst_type(tgsi_opcode);
   1519    else
   1520       type = tgsi_opcode_infer_src_type(tgsi_opcode);
   1521 
   1522    switch (type) {
   1523    case TGSI_TYPE_UNSIGNED:
   1524       return TOY_TYPE_UD;
   1525    case TGSI_TYPE_SIGNED:
   1526       return TOY_TYPE_D;
   1527    case TGSI_TYPE_FLOAT:
   1528       return TOY_TYPE_F;
   1529    case TGSI_TYPE_UNTYPED:
   1530    case TGSI_TYPE_VOID:
   1531    case TGSI_TYPE_DOUBLE:
   1532    default:
   1533       assert(!"unsupported TGSI type");
   1534       return TOY_TYPE_UD;
   1535    }
   1536 }
   1537 
   1538 /**
   1539  * Return the type of an operand of the specified instruction.
   1540  */
   1541 static enum toy_type
   1542 ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst,
   1543             int operand, bool is_dst)
   1544 {
   1545    enum toy_type type;
   1546    enum tgsi_file_type file;
   1547 
   1548    /* we need to look at both src and dst for MOV */
   1549    /* XXX it should not be this complex */
   1550    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
   1551       const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File;
   1552       const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File;
   1553 
   1554       if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) {
   1555          type = TOY_TYPE_D;
   1556       }
   1557       else if (src_file == TGSI_FILE_IMMEDIATE &&
   1558                !tgsi_inst->Src[0].Register.Indirect) {
   1559          const int src_idx = tgsi_inst->Src[0].Register.Index;
   1560          type = tgsi->imm_data.types[src_idx];
   1561       }
   1562       else {
   1563          /* this is the best we can do */
   1564          type = TOY_TYPE_F;
   1565       }
   1566 
   1567       return type;
   1568    }
   1569    else if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_UCMP) {
   1570       if (!is_dst && operand == 0)
   1571          type = TOY_TYPE_UD;
   1572       else
   1573          type = TOY_TYPE_F;
   1574 
   1575       return type;
   1576    }
   1577 
   1578    type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst);
   1579 
   1580    /* fix the type */
   1581    file = (is_dst) ?
   1582       tgsi_inst->Dst[operand].Register.File :
   1583       tgsi_inst->Src[operand].Register.File;
   1584    switch (file) {
   1585    case TGSI_FILE_SAMPLER:
   1586    case TGSI_FILE_IMAGE:
   1587    case TGSI_FILE_SAMPLER_VIEW:
   1588       type = TOY_TYPE_D;
   1589       break;
   1590    case TGSI_FILE_ADDRESS:
   1591       assert(type == TOY_TYPE_D);
   1592       break;
   1593    default:
   1594       break;
   1595    }
   1596 
   1597    return type;
   1598 }
   1599 
   1600 /**
   1601  * Allocate a VRF register.
   1602  */
   1603 static int
   1604 ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file)
   1605 {
   1606    const int count = (tgsi->aos) ? 1 : 4;
   1607    return tc_alloc_vrf(tgsi->tc, count);
   1608 }
   1609 
   1610 /**
   1611  * Construct the key for VRF mapping look-up.
   1612  */
   1613 static void *
   1614 ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index)
   1615 {
   1616    intptr_t key;
   1617 
   1618    /* this is ugly... */
   1619    assert(file  < 1 << 4);
   1620    assert(dim   < 1 << 12);
   1621    assert(index < 1 << 16);
   1622    key = (file << 28) | (dim << 16) | index;
   1623 
   1624    return intptr_to_pointer(key);
   1625 }
   1626 
   1627 /**
   1628  * Map a TGSI register to a VRF register.
   1629  */
   1630 static int
   1631 ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file,
   1632            int dim, int index, bool *is_new)
   1633 {
   1634    void *key, *val;
   1635    intptr_t vrf;
   1636 
   1637    key = ra_get_map_key(file, dim, index);
   1638 
   1639    /*
   1640     * because we allocate vrf from 1 and on, val is never NULL as long as the
   1641     * key exists
   1642     */
   1643    val = util_hash_table_get(tgsi->reg_mapping, key);
   1644    if (val) {
   1645       vrf = pointer_to_intptr(val);
   1646 
   1647       if (is_new)
   1648          *is_new = false;
   1649    }
   1650    else {
   1651       vrf = (intptr_t) ra_alloc_reg(tgsi, file);
   1652 
   1653       /* add to the mapping */
   1654       val = intptr_to_pointer(vrf);
   1655       util_hash_table_set(tgsi->reg_mapping, key, val);
   1656 
   1657       if (is_new)
   1658          *is_new = true;
   1659    }
   1660 
   1661    return (int) vrf;
   1662 }
   1663 
   1664 /**
   1665  * Return true if the destination aliases any of the sources.
   1666  */
   1667 static bool
   1668 ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index)
   1669 {
   1670    const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
   1671    int i;
   1672 
   1673    /* we need a scratch register for indirect dst anyway */
   1674    if (ra_dst_is_indirect(d))
   1675       return true;
   1676 
   1677    for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
   1678       const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
   1679 
   1680       if (s->Register.File != d->Register.File)
   1681          continue;
   1682 
   1683       /*
   1684        * we can go on to check dimension and index respectively, but
   1685        * keep it simple for now
   1686        */
   1687       if (ra_is_src_indirect(s))
   1688          return true;
   1689       if (ra_src_dimension(s) == ra_dst_dimension(d) &&
   1690           ra_src_index(s) == ra_dst_index(d))
   1691          return true;
   1692    }
   1693 
   1694    return false;
   1695 }
   1696 
   1697 /**
   1698  * Return the toy register for a TGSI destination operand.
   1699  */
   1700 static struct toy_dst
   1701 ra_get_dst(struct toy_tgsi *tgsi,
   1702            const struct tgsi_full_instruction *tgsi_inst, int dst_index,
   1703            bool *is_scratch)
   1704 {
   1705    const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
   1706    bool need_vrf = false;
   1707    struct toy_dst dst;
   1708 
   1709    switch (d->Register.File) {
   1710    case TGSI_FILE_NULL:
   1711       dst = tdst_null();
   1712       break;
   1713    case TGSI_FILE_OUTPUT:
   1714    case TGSI_FILE_TEMPORARY:
   1715    case TGSI_FILE_ADDRESS:
   1716    case TGSI_FILE_PREDICATE:
   1717       need_vrf = true;
   1718       break;
   1719    default:
   1720       assert(!"unhandled dst file");
   1721       dst = tdst_null();
   1722       break;
   1723    }
   1724 
   1725    if (need_vrf) {
   1726       /* XXX we do not always need a scratch given the conditions... */
   1727       const bool need_scratch =
   1728          (ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) ||
   1729           tgsi_inst->Instruction.Saturate);
   1730       const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true);
   1731       int vrf;
   1732 
   1733       if (need_scratch) {
   1734          vrf = ra_alloc_reg(tgsi, d->Register.File);
   1735       }
   1736       else {
   1737          vrf = ra_map_reg(tgsi, d->Register.File,
   1738                ra_dst_dimension(d), ra_dst_index(d), NULL);
   1739       }
   1740 
   1741       if (is_scratch)
   1742          *is_scratch = need_scratch;
   1743 
   1744       dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
   1745             false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
   1746    }
   1747 
   1748    return dst;
   1749 }
   1750 
   1751 static struct toy_src
   1752 ra_get_src_for_vrf(const struct tgsi_full_src_register *s,
   1753                    enum toy_type type, int vrf)
   1754 {
   1755    return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
   1756                     false, 0,
   1757                     s->Register.SwizzleX, s->Register.SwizzleY,
   1758                     s->Register.SwizzleZ, s->Register.SwizzleW,
   1759                     s->Register.Absolute, s->Register.Negate,
   1760                     vrf * TOY_REG_WIDTH);
   1761 }
   1762 
   1763 static int
   1764 init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst,
   1765               enum tgsi_file_type file, int index,
   1766               const struct tgsi_ind_register *indirect,
   1767               const struct tgsi_dimension *dimension,
   1768               const struct tgsi_ind_register *dim_indirect)
   1769 {
   1770    struct toy_src src;
   1771    int num_src = 0;
   1772 
   1773    /* src[0]: TGSI file */
   1774    inst->src[num_src++] = tsrc_imm_d(file);
   1775 
   1776    /* src[1]: TGSI dimension */
   1777    inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0);
   1778 
   1779    /* src[2]: TGSI dimension indirection */
   1780    if (dim_indirect) {
   1781       const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0,
   1782             dim_indirect->Index, NULL);
   1783 
   1784       src = tsrc(TOY_FILE_VRF, vrf, 0);
   1785       src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
   1786    }
   1787    else {
   1788       src = tsrc_imm_d(0);
   1789    }
   1790 
   1791    inst->src[num_src++] = src;
   1792 
   1793    /* src[3]: TGSI index */
   1794    inst->src[num_src++] = tsrc_imm_d(index);
   1795 
   1796    /* src[4]: TGSI index indirection */
   1797    if (indirect) {
   1798       const int vrf = ra_map_reg(tgsi, indirect->File, 0,
   1799             indirect->Index, NULL);
   1800 
   1801       src = tsrc(TOY_FILE_VRF, vrf, 0);
   1802       src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
   1803    }
   1804    else {
   1805       src = tsrc_imm_d(0);
   1806    }
   1807 
   1808    inst->src[num_src++] = src;
   1809 
   1810    return num_src;
   1811 }
   1812 
   1813 static struct toy_src
   1814 ra_get_src_indirect(struct toy_tgsi *tgsi,
   1815                     const struct tgsi_full_instruction *tgsi_inst,
   1816                     int src_index)
   1817 {
   1818    const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
   1819    bool need_vrf = false, is_resource = false;
   1820    struct toy_src src;
   1821 
   1822    switch (s->Register.File) {
   1823    case TGSI_FILE_NULL:
   1824       src = tsrc_null();
   1825       break;
   1826    case TGSI_FILE_SAMPLER:
   1827    case TGSI_FILE_IMAGE:
   1828    case TGSI_FILE_SAMPLER_VIEW:
   1829       is_resource = true;
   1830       /* fall through */
   1831    case TGSI_FILE_CONSTANT:
   1832    case TGSI_FILE_INPUT:
   1833    case TGSI_FILE_SYSTEM_VALUE:
   1834    case TGSI_FILE_TEMPORARY:
   1835    case TGSI_FILE_ADDRESS:
   1836    case TGSI_FILE_IMMEDIATE:
   1837    case TGSI_FILE_PREDICATE:
   1838       need_vrf = true;
   1839       break;
   1840    default:
   1841       assert(!"unhandled src file");
   1842       src = tsrc_null();
   1843       break;
   1844    }
   1845 
   1846    if (need_vrf) {
   1847       const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
   1848       int vrf;
   1849 
   1850       if (is_resource) {
   1851          assert(!s->Register.Dimension);
   1852          assert(s->Register.Indirect);
   1853 
   1854          vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL);
   1855       }
   1856       else {
   1857          vrf = ra_alloc_reg(tgsi, s->Register.File);
   1858       }
   1859 
   1860       src = ra_get_src_for_vrf(s, type, vrf);
   1861 
   1862       /* emit indirect fetch */
   1863       if (!is_resource) {
   1864          struct toy_inst *inst;
   1865 
   1866          inst = tc_add(tgsi->tc);
   1867          inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH;
   1868          inst->dst = tdst_from(src);
   1869          inst->dst.writemask = TOY_WRITEMASK_XYZW;
   1870 
   1871          init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index,
   1872                (s->Register.Indirect) ? &s->Indirect : NULL,
   1873                (s->Register.Dimension) ? &s->Dimension : NULL,
   1874                (s->Dimension.Indirect) ? &s->DimIndirect : NULL);
   1875       }
   1876    }
   1877 
   1878    return src;
   1879 }
   1880 
   1881 /**
   1882  * Return the toy register for a TGSI source operand.
   1883  */
   1884 static struct toy_src
   1885 ra_get_src(struct toy_tgsi *tgsi,
   1886            const struct tgsi_full_instruction *tgsi_inst,
   1887            int src_index)
   1888 {
   1889    const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
   1890    bool need_vrf = false;
   1891    struct toy_src src;
   1892 
   1893    if (ra_is_src_indirect(s))
   1894       return ra_get_src_indirect(tgsi, tgsi_inst, src_index);
   1895 
   1896    switch (s->Register.File) {
   1897    case TGSI_FILE_NULL:
   1898       src = tsrc_null();
   1899       break;
   1900    case TGSI_FILE_CONSTANT:
   1901    case TGSI_FILE_INPUT:
   1902    case TGSI_FILE_SYSTEM_VALUE:
   1903       need_vrf = true;
   1904       break;
   1905    case TGSI_FILE_TEMPORARY:
   1906    case TGSI_FILE_ADDRESS:
   1907    case TGSI_FILE_PREDICATE:
   1908       need_vrf = true;
   1909       break;
   1910    case TGSI_FILE_SAMPLER:
   1911    case TGSI_FILE_IMAGE:
   1912    case TGSI_FILE_SAMPLER_VIEW:
   1913       assert(!s->Register.Dimension);
   1914       src = tsrc_imm_d(s->Register.Index);
   1915       break;
   1916    case TGSI_FILE_IMMEDIATE:
   1917       {
   1918          const uint32_t *imm;
   1919          enum toy_type imm_type;
   1920          bool is_scalar;
   1921 
   1922          imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type);
   1923 
   1924          is_scalar =
   1925             (imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] &&
   1926              imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] &&
   1927              imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]);
   1928 
   1929          if (is_scalar) {
   1930             const enum toy_type type =
   1931                ra_get_type(tgsi, tgsi_inst, src_index, false);
   1932 
   1933             /* ignore imm_type */
   1934             src = tsrc_imm_ud(imm[s->Register.SwizzleX]);
   1935             src.type = type;
   1936             src.absolute = s->Register.Absolute;
   1937             src.negate = s->Register.Negate;
   1938          }
   1939          else {
   1940             need_vrf = true;
   1941          }
   1942       }
   1943       break;
   1944    default:
   1945       assert(!"unhandled src file");
   1946       src = tsrc_null();
   1947       break;
   1948    }
   1949 
   1950    if (need_vrf) {
   1951       const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
   1952       bool is_new;
   1953       int vrf;
   1954 
   1955       vrf = ra_map_reg(tgsi, s->Register.File,
   1956             ra_src_dimension(s), ra_src_index(s), &is_new);
   1957 
   1958       src = ra_get_src_for_vrf(s, type, vrf);
   1959 
   1960       if (is_new) {
   1961          switch (s->Register.File) {
   1962          case TGSI_FILE_TEMPORARY:
   1963          case TGSI_FILE_ADDRESS:
   1964          case TGSI_FILE_PREDICATE:
   1965             {
   1966                struct toy_dst dst = tdst_from(src);
   1967                dst.writemask = TOY_WRITEMASK_XYZW;
   1968 
   1969                /* always initialize registers before use */
   1970                if (tgsi->aos) {
   1971                   tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type));
   1972                }
   1973                else {
   1974                   struct toy_dst tdst[4];
   1975                   int i;
   1976 
   1977                   tdst_transpose(dst, tdst);
   1978 
   1979                   for (i = 0; i < 4; i++) {
   1980                      tc_MOV(tgsi->tc, tdst[i],
   1981                            tsrc_type(tsrc_imm_d(0), type));
   1982                   }
   1983                }
   1984             }
   1985             break;
   1986          default:
   1987             break;
   1988          }
   1989       }
   1990 
   1991    }
   1992 
   1993    return src;
   1994 }
   1995 
   1996 static void
   1997 parse_instruction(struct toy_tgsi *tgsi,
   1998                   const struct tgsi_full_instruction *tgsi_inst)
   1999 {
   2000    struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS];
   2001    struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS];
   2002    bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS];
   2003    toy_tgsi_translate translate;
   2004    int i;
   2005 
   2006    /* convert TGSI registers to toy registers */
   2007    for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
   2008       src[i] = ra_get_src(tgsi, tgsi_inst, i);
   2009    for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
   2010       dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]);
   2011 
   2012    /* translate the instruction */
   2013    translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode];
   2014    if (!translate) {
   2015       if (tgsi->translate_table == soa_translate_table)
   2016          soa_unsupported(tgsi->tc, tgsi_inst, dst, src);
   2017       else
   2018          aos_unsupported(tgsi->tc, tgsi_inst, dst, src);
   2019    }
   2020    translate(tgsi->tc, tgsi_inst, dst, src);
   2021 
   2022    /* write the result to the real destinations if needed */
   2023    for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
   2024       const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
   2025 
   2026       if (!dst_is_scratch[i])
   2027          continue;
   2028 
   2029       tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
   2030 
   2031       /* emit indirect store */
   2032       if (ra_dst_is_indirect(d)) {
   2033          struct toy_inst *inst;
   2034 
   2035          inst = tc_add(tgsi->tc);
   2036          inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE;
   2037          inst->dst = dst[i];
   2038 
   2039          init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index,
   2040                (d->Register.Indirect) ? &d->Indirect : NULL,
   2041                (d->Register.Dimension) ? &d->Dimension : NULL,
   2042                (d->Dimension.Indirect) ? &d->DimIndirect : NULL);
   2043       }
   2044       else {
   2045          const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true);
   2046          struct toy_dst real_dst;
   2047          int vrf;
   2048 
   2049          vrf = ra_map_reg(tgsi, d->Register.File,
   2050                ra_dst_dimension(d), ra_dst_index(d), NULL);
   2051          real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
   2052                false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
   2053 
   2054          if (tgsi->aos) {
   2055             tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i]));
   2056          }
   2057          else {
   2058             struct toy_dst tdst[4];
   2059             struct toy_src tsrc[4];
   2060             int j;
   2061 
   2062             tdst_transpose(real_dst, tdst);
   2063             tsrc_transpose(tsrc_from(dst[i]), tsrc);
   2064 
   2065             for (j = 0; j < 4; j++)
   2066                tc_MOV(tgsi->tc, tdst[j], tsrc[j]);
   2067          }
   2068       }
   2069 
   2070       tgsi->tc->templ.saturate = false;
   2071    }
   2072 
   2073    switch (tgsi_inst->Instruction.Opcode) {
   2074    case TGSI_OPCODE_KILL_IF:
   2075    case TGSI_OPCODE_KILL:
   2076       tgsi->uses_kill = true;
   2077       break;
   2078    }
   2079 
   2080    for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
   2081       const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
   2082       if (s->Register.File == TGSI_FILE_CONSTANT && s->Register.Indirect)
   2083          tgsi->const_indirect = true;
   2084    }
   2085 
   2086    /* remember channels written */
   2087    for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
   2088       const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
   2089 
   2090       if (d->Register.File != TGSI_FILE_OUTPUT)
   2091          continue;
   2092       for (i = 0; i < tgsi->num_outputs; i++) {
   2093          if (tgsi->outputs[i].index == d->Register.Index) {
   2094             tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask;
   2095             break;
   2096          }
   2097       }
   2098    }
   2099 }
   2100 
   2101 static void
   2102 decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
   2103 {
   2104    static const struct tgsi_declaration_interp default_interp = {
   2105       TGSI_INTERPOLATE_PERSPECTIVE, false, 0,
   2106    };
   2107    const struct tgsi_declaration_interp *interp =
   2108       (decl->Declaration.Interpolate) ? &decl->Interp: &default_interp;
   2109    int index;
   2110 
   2111    if (decl->Range.Last >= ARRAY_SIZE(tgsi->inputs)) {
   2112       assert(!"invalid IN");
   2113       return;
   2114    }
   2115 
   2116    for (index = decl->Range.First; index <= decl->Range.Last; index++) {
   2117       const int slot = tgsi->num_inputs++;
   2118 
   2119       tgsi->inputs[slot].index = index;
   2120       tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask;
   2121       if (decl->Declaration.Semantic) {
   2122          tgsi->inputs[slot].semantic_name = decl->Semantic.Name;
   2123          tgsi->inputs[slot].semantic_index = decl->Semantic.Index;
   2124       }
   2125       else {
   2126          tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
   2127          tgsi->inputs[slot].semantic_index = index;
   2128       }
   2129       tgsi->inputs[slot].interp = interp->Interpolate;
   2130       tgsi->inputs[slot].centroid = interp->Location == TGSI_INTERPOLATE_LOC_CENTROID;
   2131    }
   2132 }
   2133 
   2134 static void
   2135 decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
   2136 {
   2137    int index;
   2138 
   2139    if (decl->Range.Last >= ARRAY_SIZE(tgsi->outputs)) {
   2140       assert(!"invalid OUT");
   2141       return;
   2142    }
   2143 
   2144    assert(decl->Declaration.Semantic);
   2145 
   2146    for (index = decl->Range.First; index <= decl->Range.Last; index++) {
   2147       const int slot = tgsi->num_outputs++;
   2148 
   2149       tgsi->outputs[slot].index = index;
   2150       tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW;
   2151       tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask;
   2152       tgsi->outputs[slot].semantic_name = decl->Semantic.Name;
   2153       tgsi->outputs[slot].semantic_index = decl->Semantic.Index;
   2154    }
   2155 }
   2156 
   2157 static void
   2158 decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
   2159 {
   2160    int index;
   2161 
   2162    if (decl->Range.Last >= ARRAY_SIZE(tgsi->system_values)) {
   2163       assert(!"invalid SV");
   2164       return;
   2165    }
   2166 
   2167    for (index = decl->Range.First; index <= decl->Range.Last; index++) {
   2168       const int slot = tgsi->num_system_values++;
   2169 
   2170       tgsi->system_values[slot].index = index;
   2171       if (decl->Declaration.Semantic) {
   2172          tgsi->system_values[slot].semantic_name = decl->Semantic.Name;
   2173          tgsi->system_values[slot].semantic_index = decl->Semantic.Index;
   2174       }
   2175       else {
   2176          tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
   2177          tgsi->system_values[slot].semantic_index = index;
   2178       }
   2179    }
   2180 }
   2181 
   2182 /**
   2183  * Emit an instruction to fetch the value of a TGSI register.
   2184  */
   2185 static void
   2186 fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx)
   2187 {
   2188    struct toy_dst dst;
   2189    int vrf;
   2190    enum toy_opcode opcode;
   2191    enum toy_type type = TOY_TYPE_F;
   2192 
   2193    switch (file) {
   2194    case TGSI_FILE_INPUT:
   2195       opcode = TOY_OPCODE_TGSI_IN;
   2196       break;
   2197    case TGSI_FILE_CONSTANT:
   2198       opcode = TOY_OPCODE_TGSI_CONST;
   2199       break;
   2200    case TGSI_FILE_SYSTEM_VALUE:
   2201       opcode = TOY_OPCODE_TGSI_SV;
   2202       break;
   2203    case TGSI_FILE_IMMEDIATE:
   2204       opcode = TOY_OPCODE_TGSI_IMM;
   2205       toy_tgsi_get_imm(tgsi, idx, &type);
   2206       break;
   2207    default:
   2208       /* no need to fetch */
   2209       return;
   2210       break;
   2211    }
   2212 
   2213    vrf = ra_map_reg(tgsi, file, dim, idx, NULL);
   2214    dst = tdst(TOY_FILE_VRF, vrf, 0);
   2215    dst = tdst_type(dst, type);
   2216 
   2217    tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx));
   2218 }
   2219 
   2220 static void
   2221 parse_declaration(struct toy_tgsi *tgsi,
   2222                   const struct tgsi_full_declaration *decl)
   2223 {
   2224    int i;
   2225 
   2226    switch (decl->Declaration.File) {
   2227    case TGSI_FILE_INPUT:
   2228       decl_add_in(tgsi, decl);
   2229       break;
   2230    case TGSI_FILE_OUTPUT:
   2231       decl_add_out(tgsi, decl);
   2232       break;
   2233    case TGSI_FILE_SYSTEM_VALUE:
   2234       decl_add_sv(tgsi, decl);
   2235       break;
   2236    case TGSI_FILE_IMMEDIATE:
   2237       /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
   2238       assert(!"unexpected immediate declaration");
   2239       break;
   2240    case TGSI_FILE_CONSTANT:
   2241       if (tgsi->const_count <= decl->Range.Last)
   2242          tgsi->const_count = decl->Range.Last + 1;
   2243       break;
   2244    case TGSI_FILE_NULL:
   2245    case TGSI_FILE_TEMPORARY:
   2246    case TGSI_FILE_SAMPLER:
   2247    case TGSI_FILE_PREDICATE:
   2248    case TGSI_FILE_ADDRESS:
   2249    case TGSI_FILE_IMAGE:
   2250    case TGSI_FILE_SAMPLER_VIEW:
   2251       /* nothing to do */
   2252       break;
   2253    default:
   2254       assert(!"unhandled TGSI file");
   2255       break;
   2256    }
   2257 
   2258    /* fetch the registers now */
   2259    for (i = decl->Range.First; i <= decl->Range.Last; i++) {
   2260       const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0;
   2261       fetch_source(tgsi, decl->Declaration.File, dim, i);
   2262    }
   2263 }
   2264 
   2265 static int
   2266 add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf)
   2267 {
   2268    /* reallocate the buffer if necessary */
   2269    if (tgsi->imm_data.cur >= tgsi->imm_data.size) {
   2270       const int cur_size = tgsi->imm_data.size;
   2271       int new_size;
   2272       enum toy_type *new_types;
   2273       uint32_t (*new_buf)[4];
   2274 
   2275       new_size = (cur_size) ? cur_size << 1 : 16;
   2276       while (new_size <= tgsi->imm_data.cur)
   2277          new_size <<= 1;
   2278 
   2279       new_buf = REALLOC(tgsi->imm_data.buf,
   2280             cur_size * sizeof(new_buf[0]),
   2281             new_size * sizeof(new_buf[0]));
   2282       new_types = REALLOC(tgsi->imm_data.types,
   2283             cur_size * sizeof(new_types[0]),
   2284             new_size * sizeof(new_types[0]));
   2285       if (!new_buf || !new_types) {
   2286          FREE(new_buf);
   2287          FREE(new_types);
   2288          return -1;
   2289       }
   2290 
   2291       tgsi->imm_data.buf = new_buf;
   2292       tgsi->imm_data.types = new_types;
   2293       tgsi->imm_data.size = new_size;
   2294    }
   2295 
   2296    tgsi->imm_data.types[tgsi->imm_data.cur] = type;
   2297    memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur],
   2298          buf, sizeof(tgsi->imm_data.buf[0]));
   2299 
   2300    return tgsi->imm_data.cur++;
   2301 }
   2302 
   2303 static void
   2304 parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm)
   2305 {
   2306    enum toy_type type;
   2307    uint32_t imm_buf[4];
   2308    int idx;
   2309 
   2310    switch (imm->Immediate.DataType) {
   2311    case TGSI_IMM_FLOAT32:
   2312       type = TOY_TYPE_F;
   2313       imm_buf[0] = fui(imm->u[0].Float);
   2314       imm_buf[1] = fui(imm->u[1].Float);
   2315       imm_buf[2] = fui(imm->u[2].Float);
   2316       imm_buf[3] = fui(imm->u[3].Float);
   2317       break;
   2318    case TGSI_IMM_INT32:
   2319       type = TOY_TYPE_D;
   2320       imm_buf[0] = (uint32_t) imm->u[0].Int;
   2321       imm_buf[1] = (uint32_t) imm->u[1].Int;
   2322       imm_buf[2] = (uint32_t) imm->u[2].Int;
   2323       imm_buf[3] = (uint32_t) imm->u[3].Int;
   2324       break;
   2325    case TGSI_IMM_UINT32:
   2326       type = TOY_TYPE_UD;
   2327       imm_buf[0] = imm->u[0].Uint;
   2328       imm_buf[1] = imm->u[1].Uint;
   2329       imm_buf[2] = imm->u[2].Uint;
   2330       imm_buf[3] = imm->u[3].Uint;
   2331       break;
   2332    default:
   2333       assert(!"unhandled TGSI imm type");
   2334       type = TOY_TYPE_F;
   2335       memset(imm_buf, 0, sizeof(imm_buf));
   2336       break;
   2337    }
   2338 
   2339    idx = add_imm(tgsi, type, imm_buf);
   2340    if (idx >= 0)
   2341       fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx);
   2342    else
   2343       tc_fail(tgsi->tc, "failed to add TGSI imm");
   2344 }
   2345 
   2346 static void
   2347 parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop)
   2348 {
   2349    switch (prop->Property.PropertyName) {
   2350    case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
   2351       tgsi->props.vs_prohibit_ucps = prop->u[0].Data;
   2352       break;
   2353    case TGSI_PROPERTY_FS_COORD_ORIGIN:
   2354       tgsi->props.fs_coord_origin = prop->u[0].Data;
   2355       break;
   2356    case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
   2357       tgsi->props.fs_coord_pixel_center = prop->u[0].Data;
   2358       break;
   2359    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
   2360       tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data;
   2361       break;
   2362    case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
   2363       tgsi->props.fs_depth_layout = prop->u[0].Data;
   2364       break;
   2365    case TGSI_PROPERTY_GS_INPUT_PRIM:
   2366       tgsi->props.gs_input_prim = prop->u[0].Data;
   2367       break;
   2368    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
   2369       tgsi->props.gs_output_prim = prop->u[0].Data;
   2370       break;
   2371    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
   2372       tgsi->props.gs_max_output_vertices = prop->u[0].Data;
   2373       break;
   2374    default:
   2375       assert(!"unhandled TGSI property");
   2376       break;
   2377    }
   2378 }
   2379 
   2380 static void
   2381 parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token)
   2382 {
   2383    switch (token->Token.Type) {
   2384    case TGSI_TOKEN_TYPE_DECLARATION:
   2385       parse_declaration(tgsi, &token->FullDeclaration);
   2386       break;
   2387    case TGSI_TOKEN_TYPE_IMMEDIATE:
   2388       parse_immediate(tgsi, &token->FullImmediate);
   2389       break;
   2390    case TGSI_TOKEN_TYPE_INSTRUCTION:
   2391       parse_instruction(tgsi, &token->FullInstruction);
   2392       break;
   2393    case TGSI_TOKEN_TYPE_PROPERTY:
   2394       parse_property(tgsi, &token->FullProperty);
   2395       break;
   2396    default:
   2397       assert(!"unhandled TGSI token type");
   2398       break;
   2399    }
   2400 }
   2401 
   2402 static enum pipe_error
   2403 dump_reg_mapping(void *key, void *val, void *data)
   2404 {
   2405    int tgsi_file, tgsi_dim, tgsi_index;
   2406    uint32_t sig, vrf;
   2407 
   2408    sig = (uint32_t) pointer_to_intptr(key);
   2409    vrf = (uint32_t) pointer_to_intptr(val);
   2410 
   2411    /* see ra_get_map_key() */
   2412    tgsi_file =  (sig >> 28) & 0xf;
   2413    tgsi_dim =   (sig >> 16) & 0xfff;
   2414    tgsi_index = (sig >> 0)  & 0xffff;
   2415 
   2416    if (tgsi_dim) {
   2417       ilo_printf("  v%d:\t%s[%d][%d]\n", vrf,
   2418                  tgsi_file_name(tgsi_file), tgsi_dim, tgsi_index);
   2419    }
   2420    else {
   2421       ilo_printf("  v%d:\t%s[%d]\n", vrf,
   2422                  tgsi_file_name(tgsi_file), tgsi_index);
   2423    }
   2424 
   2425    return PIPE_OK;
   2426 }
   2427 
   2428 /**
   2429  * Dump the TGSI translator, currently only the register mapping.
   2430  */
   2431 void
   2432 toy_tgsi_dump(const struct toy_tgsi *tgsi)
   2433 {
   2434    util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL);
   2435 }
   2436 
   2437 /**
   2438  * Clean up the TGSI translator.
   2439  */
   2440 void
   2441 toy_tgsi_cleanup(struct toy_tgsi *tgsi)
   2442 {
   2443    FREE(tgsi->imm_data.buf);
   2444    FREE(tgsi->imm_data.types);
   2445 
   2446    util_hash_table_destroy(tgsi->reg_mapping);
   2447 }
   2448 
   2449 static unsigned
   2450 reg_mapping_hash(void *key)
   2451 {
   2452    return (unsigned) pointer_to_intptr(key);
   2453 }
   2454 
   2455 static int
   2456 reg_mapping_compare(void *key1, void *key2)
   2457 {
   2458    return (key1 != key2);
   2459 }
   2460 
   2461 /**
   2462  * Initialize the TGSI translator.
   2463  */
   2464 static bool
   2465 init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos)
   2466 {
   2467    memset(tgsi, 0, sizeof(*tgsi));
   2468 
   2469    tgsi->tc = tc;
   2470    tgsi->aos = aos;
   2471    tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table;
   2472 
   2473    /* create a mapping of TGSI registers to VRF reigsters */
   2474    tgsi->reg_mapping =
   2475       util_hash_table_create(reg_mapping_hash, reg_mapping_compare);
   2476 
   2477    return (tgsi->reg_mapping != NULL);
   2478 }
   2479 
   2480 /**
   2481  * Translate TGSI tokens into toy instructions.
   2482  */
   2483 void
   2484 toy_compiler_translate_tgsi(struct toy_compiler *tc,
   2485                             const struct tgsi_token *tokens, bool aos,
   2486                             struct toy_tgsi *tgsi)
   2487 {
   2488    struct tgsi_parse_context parse;
   2489 
   2490    if (!init_tgsi(tgsi, tc, aos)) {
   2491       tc_fail(tc, "failed to initialize TGSI translator");
   2492       return;
   2493    }
   2494 
   2495    tgsi_parse_init(&parse, tokens);
   2496    while (!tgsi_parse_end_of_tokens(&parse)) {
   2497       tgsi_parse_token(&parse);
   2498       parse_token(tgsi, &parse.FullToken);
   2499    }
   2500    tgsi_parse_free(&parse);
   2501 }
   2502 
   2503 /**
   2504  * Map the TGSI register to VRF register.
   2505  */
   2506 int
   2507 toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
   2508                  enum tgsi_file_type file, int dimension, int index)
   2509 {
   2510    void *key, *val;
   2511 
   2512    key = ra_get_map_key(file, dimension, index);
   2513 
   2514    val = util_hash_table_get(tgsi->reg_mapping, key);
   2515 
   2516    return (val) ? pointer_to_intptr(val) : -1;
   2517 }
   2518