Home | History | Annotate | Download | only in state_tracker
      1 /*
      2  * Copyright (C) 2016 Mikls Mt
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  */
     22 
     23 #include "main/mtypes.h"
     24 #include "main/atifragshader.h"
     25 #include "main/errors.h"
     26 #include "program/prog_parameter.h"
     27 
     28 #include "tgsi/tgsi_ureg.h"
     29 #include "tgsi/tgsi_scan.h"
     30 #include "tgsi/tgsi_transform.h"
     31 
     32 #include "st_program.h"
     33 #include "st_atifs_to_tgsi.h"
     34 
     35 /**
     36  * Intermediate state used during shader translation.
     37  */
     38 struct st_translate {
     39    struct ureg_program *ureg;
     40    struct ati_fragment_shader *atifs;
     41 
     42    struct ureg_dst temps[MAX_PROGRAM_TEMPS];
     43    struct ureg_src *constants;
     44    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
     45    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
     46    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
     47 
     48    const GLuint *inputMapping;
     49    const GLuint *outputMapping;
     50 
     51    unsigned current_pass;
     52 
     53    bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI];
     54 
     55    boolean error;
     56 };
     57 
     58 struct instruction_desc {
     59    unsigned TGSI_opcode;
     60    const char *name;
     61    unsigned char arg_count;
     62 };
     63 
     64 static const struct instruction_desc inst_desc[] = {
     65    {TGSI_OPCODE_MOV, "MOV", 1},
     66    {TGSI_OPCODE_NOP, "UND", 0}, /* unused */
     67    {TGSI_OPCODE_ADD, "ADD", 2},
     68    {TGSI_OPCODE_MUL, "MUL", 2},
     69    {TGSI_OPCODE_NOP, "SUB", 2},
     70    {TGSI_OPCODE_DP3, "DOT3", 2},
     71    {TGSI_OPCODE_DP4, "DOT4", 2},
     72    {TGSI_OPCODE_MAD, "MAD", 3},
     73    {TGSI_OPCODE_LRP, "LERP", 3},
     74    {TGSI_OPCODE_NOP, "CND", 3},
     75    {TGSI_OPCODE_NOP, "CND0", 3},
     76    {TGSI_OPCODE_NOP, "DOT2_ADD", 3}
     77 };
     78 
     79 static struct ureg_dst
     80 get_temp(struct st_translate *t, unsigned index)
     81 {
     82    if (ureg_dst_is_undef(t->temps[index]))
     83       t->temps[index] = ureg_DECL_temporary(t->ureg);
     84    return t->temps[index];
     85 }
     86 
     87 static struct ureg_src
     88 apply_swizzle(struct st_translate *t,
     89               struct ureg_src src, GLuint swizzle)
     90 {
     91    if (swizzle == GL_SWIZZLE_STR_ATI) {
     92       return src;
     93    } else if (swizzle == GL_SWIZZLE_STQ_ATI) {
     94       return ureg_swizzle(src,
     95                           TGSI_SWIZZLE_X,
     96                           TGSI_SWIZZLE_Y,
     97                           TGSI_SWIZZLE_W,
     98                           TGSI_SWIZZLE_Z);
     99    } else {
    100       struct ureg_dst tmp[2];
    101       struct ureg_src imm[3];
    102 
    103       tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
    104       tmp[1] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 1);
    105       imm[0] = src;
    106       imm[1] = ureg_imm4f(t->ureg, 1.0f, 1.0f, 0.0f, 0.0f);
    107       imm[2] = ureg_imm4f(t->ureg, 0.0f, 0.0f, 1.0f, 1.0f);
    108       ureg_insn(t->ureg, TGSI_OPCODE_MAD, &tmp[0], 1, imm, 3);
    109 
    110       if (swizzle == GL_SWIZZLE_STR_DR_ATI) {
    111          imm[0] = ureg_scalar(src, TGSI_SWIZZLE_Z);
    112       } else {
    113          imm[0] = ureg_scalar(src, TGSI_SWIZZLE_W);
    114       }
    115       ureg_insn(t->ureg, TGSI_OPCODE_RCP, &tmp[1], 1, &imm[0], 1);
    116 
    117       imm[0] = ureg_src(tmp[0]);
    118       imm[1] = ureg_src(tmp[1]);
    119       ureg_insn(t->ureg, TGSI_OPCODE_MUL, &tmp[0], 1, imm, 2);
    120 
    121       return ureg_src(tmp[0]);
    122    }
    123 }
    124 
    125 static struct ureg_src
    126 get_source(struct st_translate *t, GLuint src_type)
    127 {
    128    if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) {
    129       if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) {
    130          return ureg_src(get_temp(t, src_type - GL_REG_0_ATI));
    131       } else {
    132          return ureg_imm1f(t->ureg, 0.0f);
    133       }
    134    } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) {
    135       return t->constants[src_type - GL_CON_0_ATI];
    136    } else if (src_type == GL_ZERO) {
    137       return ureg_imm1f(t->ureg, 0.0f);
    138    } else if (src_type == GL_ONE) {
    139       return ureg_imm1f(t->ureg, 1.0f);
    140    } else if (src_type == GL_PRIMARY_COLOR_ARB) {
    141       return t->inputs[t->inputMapping[VARYING_SLOT_COL0]];
    142    } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) {
    143       return t->inputs[t->inputMapping[VARYING_SLOT_COL1]];
    144    } else {
    145       /* frontend prevents this */
    146       unreachable("unknown source");
    147    }
    148 }
    149 
    150 static struct ureg_src
    151 prepare_argument(struct st_translate *t, const unsigned argId,
    152                  const struct atifragshader_src_register *srcReg)
    153 {
    154    struct ureg_src src = get_source(t, srcReg->Index);
    155    struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + argId);
    156 
    157    switch (srcReg->argRep) {
    158    case GL_NONE:
    159       break;
    160    case GL_RED:
    161       src = ureg_scalar(src, TGSI_SWIZZLE_X);
    162       break;
    163    case GL_GREEN:
    164       src = ureg_scalar(src, TGSI_SWIZZLE_Y);
    165       break;
    166    case GL_BLUE:
    167       src = ureg_scalar(src, TGSI_SWIZZLE_Z);
    168       break;
    169    case GL_ALPHA:
    170       src = ureg_scalar(src, TGSI_SWIZZLE_W);
    171       break;
    172    }
    173    ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1);
    174 
    175    if (srcReg->argMod & GL_COMP_BIT_ATI) {
    176       struct ureg_src modsrc[2];
    177       modsrc[0] = ureg_imm1f(t->ureg, 1.0f);
    178       modsrc[1] = ureg_negate(ureg_src(arg));
    179 
    180       ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
    181    }
    182    if (srcReg->argMod & GL_BIAS_BIT_ATI) {
    183       struct ureg_src modsrc[2];
    184       modsrc[0] = ureg_src(arg);
    185       modsrc[1] = ureg_imm1f(t->ureg, -0.5f);
    186 
    187       ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
    188    }
    189    if (srcReg->argMod & GL_2X_BIT_ATI) {
    190       struct ureg_src modsrc[2];
    191       modsrc[0] = ureg_src(arg);
    192       modsrc[1] = ureg_src(arg);
    193 
    194       ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
    195    }
    196    if (srcReg->argMod & GL_NEGATE_BIT_ATI) {
    197       struct ureg_src modsrc[2];
    198       modsrc[0] = ureg_src(arg);
    199       modsrc[1] = ureg_imm1f(t->ureg, -1.0f);
    200 
    201       ureg_insn(t->ureg, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2);
    202    }
    203    return  ureg_src(arg);
    204 }
    205 
    206 /* These instructions need special treatment */
    207 static void
    208 emit_special_inst(struct st_translate *t, const struct instruction_desc *desc,
    209                   struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
    210 {
    211    struct ureg_dst tmp[1];
    212    struct ureg_src src[3];
    213 
    214    if (!strcmp(desc->name, "SUB")) {
    215       ureg_ADD(t->ureg, *dst, args[0], ureg_negate(args[1]));
    216    } else if (!strcmp(desc->name, "CND")) {
    217       tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */
    218       src[0] = ureg_imm1f(t->ureg, 0.5f);
    219       src[1] = ureg_negate(args[2]);
    220       ureg_insn(t->ureg, TGSI_OPCODE_ADD, tmp, 1, src, 2);
    221       src[0] = ureg_src(tmp[0]);
    222       src[1] = args[0];
    223       src[2] = args[1];
    224       ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
    225    } else if (!strcmp(desc->name, "CND0")) {
    226       src[0] = args[2];
    227       src[1] = args[1];
    228       src[2] = args[0];
    229       ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
    230    } else if (!strcmp(desc->name, "DOT2_ADD")) {
    231       /* note: DP2A is not implemented in most pipe drivers */
    232       tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */
    233       src[0] = args[0];
    234       src[1] = args[1];
    235       ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2);
    236       src[0] = ureg_src(tmp[0]);
    237       src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z);
    238       ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2);
    239    }
    240 }
    241 
    242 static void
    243 emit_arith_inst(struct st_translate *t,
    244                 const struct instruction_desc *desc,
    245                 struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
    246 {
    247    if (desc->TGSI_opcode == TGSI_OPCODE_NOP) {
    248       return emit_special_inst(t, desc, dst, args, argcount);
    249    }
    250 
    251    ureg_insn(t->ureg, desc->TGSI_opcode, dst, 1, args, argcount);
    252 }
    253 
    254 static void
    255 emit_dstmod(struct st_translate *t,
    256             struct ureg_dst dst, GLuint dstMod)
    257 {
    258    float imm;
    259    struct ureg_src src[3];
    260    GLuint scale = dstMod & ~GL_SATURATE_BIT_ATI;
    261 
    262    if (dstMod == GL_NONE) {
    263       return;
    264    }
    265 
    266    switch (scale) {
    267    case GL_2X_BIT_ATI:
    268       imm = 2.0f;
    269       break;
    270    case GL_4X_BIT_ATI:
    271       imm = 4.0f;
    272       break;
    273    case GL_8X_BIT_ATI:
    274       imm = 8.0f;
    275       break;
    276    case GL_HALF_BIT_ATI:
    277       imm = 0.5f;
    278       break;
    279    case GL_QUARTER_BIT_ATI:
    280       imm = 0.25f;
    281       break;
    282    case GL_EIGHTH_BIT_ATI:
    283       imm = 0.125f;
    284       break;
    285    default:
    286       imm = 1.0f;
    287    }
    288 
    289    src[0] = ureg_src(dst);
    290    src[1] = ureg_imm1f(t->ureg, imm);
    291    if (dstMod & GL_SATURATE_BIT_ATI) {
    292       dst = ureg_saturate(dst);
    293    }
    294    ureg_insn(t->ureg, TGSI_OPCODE_MUL, &dst, 1, src, 2);
    295 }
    296 
    297 /**
    298  * Compile one setup instruction to TGSI instructions.
    299  */
    300 static void
    301 compile_setupinst(struct st_translate *t,
    302                   const unsigned r,
    303                   const struct atifs_setupinst *texinst)
    304 {
    305    struct ureg_dst dst[1];
    306    struct ureg_src src[2];
    307 
    308    if (!texinst->Opcode)
    309       return;
    310 
    311    dst[0] = get_temp(t, r);
    312 
    313    GLuint pass_tex = texinst->src;
    314 
    315    if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
    316       unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0;
    317 
    318       src[0] = t->inputs[t->inputMapping[attr]];
    319    } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
    320       unsigned reg = pass_tex - GL_REG_0_ATI;
    321 
    322       /* the frontend already validated that REG is only allowed in second pass */
    323       if (t->regs_written[0][reg]) {
    324          src[0] = ureg_src(t->temps[reg]);
    325       } else {
    326          src[0] = ureg_imm1f(t->ureg, 0.0f);
    327       }
    328    }
    329    src[0] = apply_swizzle(t, src[0], texinst->swizzle);
    330 
    331    if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
    332       /* by default texture and sampler indexes are the same */
    333       src[1] = t->samplers[r];
    334       /* the texture target is still unknown, it will be fixed in the draw call */
    335       ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, TGSI_TEXTURE_2D,
    336                     NULL, 0, src, 2);
    337    } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
    338       ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
    339    }
    340 
    341    t->regs_written[t->current_pass][r] = true;
    342 }
    343 
    344 /**
    345  * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions.
    346  */
    347 static void
    348 compile_instruction(struct st_translate *t,
    349                     const struct atifs_instruction *inst)
    350 {
    351    unsigned optype;
    352 
    353    for (optype = 0; optype < 2; optype++) { /* color, alpha */
    354       const struct instruction_desc *desc;
    355       struct ureg_dst dst[1];
    356       struct ureg_src args[3]; /* arguments for the main operation */
    357       unsigned arg;
    358       unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
    359 
    360       if (!inst->Opcode[optype])
    361          continue;
    362 
    363       desc = &inst_desc[inst->Opcode[optype] - GL_MOV_ATI];
    364 
    365       /* prepare the arguments */
    366       for (arg = 0; arg < desc->arg_count; arg++) {
    367          if (arg >= inst->ArgCount[optype]) {
    368             _mesa_warning(0, "Using 0 for missing argument %d of %s\n",
    369                           arg, desc->name);
    370             args[arg] = ureg_imm1f(t->ureg, 0.0f);
    371          } else {
    372             args[arg] = prepare_argument(t, arg,
    373                                          &inst->SrcReg[optype][arg]);
    374          }
    375       }
    376 
    377       /* prepare dst */
    378       dst[0] = get_temp(t, dstreg);
    379 
    380       if (optype) {
    381          dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W);
    382       } else {
    383          GLuint dstMask = inst->DstReg[optype].dstMask;
    384          if (dstMask == GL_NONE) {
    385             dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ);
    386          } else {
    387             dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */
    388          }
    389       }
    390 
    391       /* emit the main instruction */
    392       emit_arith_inst(t, desc, dst, args, arg);
    393 
    394       emit_dstmod(t, *dst, inst->DstReg[optype].dstMod);
    395 
    396       t->regs_written[t->current_pass][dstreg] = true;
    397    }
    398 }
    399 
    400 static void
    401 finalize_shader(struct st_translate *t, unsigned numPasses)
    402 {
    403    struct ureg_dst dst[1] = { { 0 } };
    404    struct ureg_src src[1] = { { 0 } };
    405 
    406    if (t->regs_written[numPasses-1][0]) {
    407       /* copy the result into the OUT slot */
    408       dst[0] = t->outputs[t->outputMapping[FRAG_RESULT_COLOR]];
    409       src[0] = ureg_src(t->temps[0]);
    410       ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
    411    }
    412 
    413    /* signal the end of the program */
    414    ureg_insn(t->ureg, TGSI_OPCODE_END, dst, 0, src, 0);
    415 }
    416 
    417 /**
    418  * Called when a new variant is needed, we need to translate
    419  * the ATI fragment shader to TGSI
    420  */
    421 enum pipe_error
    422 st_translate_atifs_program(
    423    struct ureg_program *ureg,
    424    struct ati_fragment_shader *atifs,
    425    struct gl_program *program,
    426    GLuint numInputs,
    427    const GLuint inputMapping[],
    428    const ubyte inputSemanticName[],
    429    const ubyte inputSemanticIndex[],
    430    const GLuint interpMode[],
    431    GLuint numOutputs,
    432    const GLuint outputMapping[],
    433    const ubyte outputSemanticName[],
    434    const ubyte outputSemanticIndex[])
    435 {
    436    enum pipe_error ret = PIPE_OK;
    437 
    438    unsigned pass, i, r;
    439 
    440    struct st_translate translate, *t;
    441    t = &translate;
    442    memset(t, 0, sizeof *t);
    443 
    444    t->inputMapping = inputMapping;
    445    t->outputMapping = outputMapping;
    446    t->ureg = ureg;
    447    t->atifs = atifs;
    448 
    449    /*
    450     * Declare input attributes.
    451     */
    452    for (i = 0; i < numInputs; i++) {
    453       t->inputs[i] = ureg_DECL_fs_input(ureg,
    454                                         inputSemanticName[i],
    455                                         inputSemanticIndex[i],
    456                                         interpMode[i]);
    457    }
    458 
    459    /*
    460     * Declare output attributes:
    461     *  we always have numOutputs=1 and it's FRAG_RESULT_COLOR
    462     */
    463    t->outputs[0] = ureg_DECL_output(ureg,
    464                                     TGSI_SEMANTIC_COLOR,
    465                                     outputSemanticIndex[0]);
    466 
    467    /* Emit constants and immediates.  Mesa uses a single index space
    468     * for these, so we put all the translated regs in t->constants.
    469     */
    470    if (program->Parameters) {
    471       t->constants = calloc(program->Parameters->NumParameters,
    472                             sizeof t->constants[0]);
    473       if (t->constants == NULL) {
    474          ret = PIPE_ERROR_OUT_OF_MEMORY;
    475          goto out;
    476       }
    477 
    478       for (i = 0; i < program->Parameters->NumParameters; i++) {
    479          switch (program->Parameters->Parameters[i].Type) {
    480          case PROGRAM_STATE_VAR:
    481          case PROGRAM_UNIFORM:
    482             t->constants[i] = ureg_DECL_constant(ureg, i);
    483             break;
    484          case PROGRAM_CONSTANT:
    485             t->constants[i] =
    486                ureg_DECL_immediate(ureg,
    487                                    (const float*)program->Parameters->ParameterValues[i],
    488                                    4);
    489             break;
    490          default:
    491             break;
    492          }
    493       }
    494    }
    495 
    496    /* texture samplers */
    497    for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) {
    498       if (program->SamplersUsed & (1 << i)) {
    499          t->samplers[i] = ureg_DECL_sampler(ureg, i);
    500          /* the texture target is still unknown, it will be fixed in the draw call */
    501          ureg_DECL_sampler_view(ureg, i, TGSI_TEXTURE_2D,
    502                                 TGSI_RETURN_TYPE_FLOAT,
    503                                 TGSI_RETURN_TYPE_FLOAT,
    504                                 TGSI_RETURN_TYPE_FLOAT,
    505                                 TGSI_RETURN_TYPE_FLOAT);
    506       }
    507    }
    508 
    509    /* emit instructions */
    510    for (pass = 0; pass < atifs->NumPasses; pass++) {
    511       t->current_pass = pass;
    512       for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
    513          struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
    514          compile_setupinst(t, r, texinst);
    515       }
    516       for (i = 0; i < atifs->numArithInstr[pass]; i++) {
    517          struct atifs_instruction *inst = &atifs->Instructions[pass][i];
    518          compile_instruction(t, inst);
    519       }
    520    }
    521 
    522    finalize_shader(t, atifs->NumPasses);
    523 
    524 out:
    525    free(t->constants);
    526 
    527    if (t->error) {
    528       debug_printf("%s: translate error flag set\n", __func__);
    529    }
    530 
    531    return ret;
    532 }
    533 
    534 /**
    535  * Called in ProgramStringNotify, we need to fill the metadata of the
    536  * gl_program attached to the ati_fragment_shader
    537  */
    538 void
    539 st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog)
    540 {
    541    /* we know this is st_fragment_program, because of st_new_ati_fs() */
    542    struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
    543    struct ati_fragment_shader *atifs = stfp->ati_fs;
    544 
    545    unsigned pass, i, r, optype, arg;
    546 
    547    static const gl_state_index fog_params_state[STATE_LENGTH] =
    548       {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0};
    549    static const gl_state_index fog_color[STATE_LENGTH] =
    550       {STATE_FOG_COLOR, 0, 0, 0, 0};
    551 
    552    prog->info.inputs_read = 0;
    553    prog->info.outputs_written = BITFIELD64_BIT(FRAG_RESULT_COLOR);
    554    prog->SamplersUsed = 0;
    555    prog->Parameters = _mesa_new_parameter_list();
    556 
    557    /* fill in inputs_read, SamplersUsed, TexturesUsed */
    558    for (pass = 0; pass < atifs->NumPasses; pass++) {
    559       for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
    560          struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
    561          GLuint pass_tex = texinst->src;
    562 
    563          if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
    564             /* mark which texcoords are used */
    565             prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
    566             /* by default there is 1:1 mapping between samplers and textures */
    567             prog->SamplersUsed |= (1 << r);
    568             /* the target is unknown here, it will be fixed in the draw call */
    569             prog->TexturesUsed[r] = TEXTURE_2D_BIT;
    570          } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
    571             if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
    572                prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
    573             }
    574          }
    575       }
    576    }
    577    for (pass = 0; pass < atifs->NumPasses; pass++) {
    578       for (i = 0; i < atifs->numArithInstr[pass]; i++) {
    579          struct atifs_instruction *inst = &atifs->Instructions[pass][i];
    580 
    581          for (optype = 0; optype < 2; optype++) { /* color, alpha */
    582             if (inst->Opcode[optype]) {
    583                for (arg = 0; arg < inst->ArgCount[optype]; arg++) {
    584                   GLint index = inst->SrcReg[optype][arg].Index;
    585                   if (index == GL_PRIMARY_COLOR_EXT) {
    586                      prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_COL0);
    587                   } else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
    588                      /* note: ATI_fragment_shader.txt never specifies what
    589                       * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses
    590                       * VARYING_SLOT_COL1 for this input */
    591                      prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_COL1);
    592                   }
    593                }
    594             }
    595          }
    596       }
    597    }
    598    /* we may need fog */
    599    prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_FOGC);
    600 
    601    /* we always have the ATI_fs constants, and the fog params */
    602    for (i = 0; i < MAX_NUM_FRAGMENT_CONSTANTS_ATI; i++) {
    603       _mesa_add_parameter(prog->Parameters, PROGRAM_UNIFORM,
    604                           NULL, 4, GL_FLOAT, NULL, NULL);
    605    }
    606    _mesa_add_state_reference(prog->Parameters, fog_params_state);
    607    _mesa_add_state_reference(prog->Parameters, fog_color);
    608 
    609    prog->arb.NumInstructions = 0;
    610    prog->arb.NumTemporaries = MAX_NUM_FRAGMENT_REGISTERS_ATI + 3; /* 3 input temps for arith ops */
    611    prog->arb.NumParameters = MAX_NUM_FRAGMENT_CONSTANTS_ATI + 2; /* 2 state variables for fog */
    612 }
    613 
    614 
    615 struct tgsi_atifs_transform {
    616    struct tgsi_transform_context base;
    617    struct tgsi_shader_info info;
    618    const struct st_fp_variant_key *key;
    619    bool first_instruction_emitted;
    620    unsigned fog_factor_temp;
    621    unsigned fog_clamp_imm;
    622 };
    623 
    624 static inline struct tgsi_atifs_transform *
    625 tgsi_atifs_transform(struct tgsi_transform_context *tctx)
    626 {
    627    return (struct tgsi_atifs_transform *)tctx;
    628 }
    629 
    630 /* copied from st_cb_drawpixels_shader.c */
    631 static void
    632 set_src(struct tgsi_full_instruction *inst, unsigned i, unsigned file, unsigned index,
    633         unsigned x, unsigned y, unsigned z, unsigned w)
    634 {
    635    inst->Src[i].Register.File  = file;
    636    inst->Src[i].Register.Index = index;
    637    inst->Src[i].Register.SwizzleX = x;
    638    inst->Src[i].Register.SwizzleY = y;
    639    inst->Src[i].Register.SwizzleZ = z;
    640    inst->Src[i].Register.SwizzleW = w;
    641 }
    642 
    643 #define SET_SRC(inst, i, file, index, x, y, z, w) \
    644    set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \
    645            TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w)
    646 
    647 static void
    648 transform_decl(struct tgsi_transform_context *tctx,
    649                struct tgsi_full_declaration *decl)
    650 {
    651    struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx);
    652 
    653    if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
    654       /* fix texture target */
    655       unsigned newtarget = ctx->key->texture_targets[decl->Range.First];
    656       if (newtarget)
    657          decl->SamplerView.Resource = newtarget;
    658    }
    659 
    660    tctx->emit_declaration(tctx, decl);
    661 }
    662 
    663 static void
    664 transform_instr(struct tgsi_transform_context *tctx,
    665                 struct tgsi_full_instruction *current_inst)
    666 {
    667    struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx);
    668 
    669    if (ctx->first_instruction_emitted)
    670       goto transform_inst;
    671 
    672    ctx->first_instruction_emitted = true;
    673 
    674    if (ctx->key->fog) {
    675       /* add a new temp for the fog factor */
    676       ctx->fog_factor_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1;
    677       tgsi_transform_temp_decl(tctx, ctx->fog_factor_temp);
    678 
    679       /* add immediates for clamp */
    680       ctx->fog_clamp_imm = ctx->info.immediate_count;
    681       tgsi_transform_immediate_decl(tctx, 1.0f, 0.0f, 0.0f, 0.0f);
    682    }
    683 
    684 transform_inst:
    685    if (current_inst->Instruction.Opcode == TGSI_OPCODE_TEX) {
    686       /* fix texture target */
    687       unsigned newtarget = ctx->key->texture_targets[current_inst->Src[1].Register.Index];
    688       if (newtarget)
    689          current_inst->Texture.Texture = newtarget;
    690 
    691    } else if (ctx->key->fog && current_inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
    692               current_inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
    693       struct tgsi_full_instruction inst;
    694       unsigned i;
    695       int fogc_index = -1;
    696       int reg0_index = current_inst->Src[0].Register.Index;
    697 
    698       /* find FOGC input */
    699       for (i = 0; i < ctx->info.num_inputs; i++) {
    700          if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FOG) {
    701             fogc_index = i;
    702             break;
    703          }
    704       }
    705       if (fogc_index < 0) {
    706          /* should never be reached, because fog coord input is always declared */
    707          tctx->emit_instruction(tctx, current_inst);
    708          return;
    709       }
    710 
    711       /* compute the 1 component fog factor f */
    712       if (ctx->key->fog == 1) {
    713          /* LINEAR formula: f = (end - z) / (end - start)
    714           * with optimized parameters:
    715           *    f = MAD(fogcoord, oparams.x, oparams.y)
    716           */
    717          inst = tgsi_default_full_instruction();
    718          inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    719          inst.Instruction.NumDstRegs = 1;
    720          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
    721          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
    722          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
    723          inst.Instruction.NumSrcRegs = 3;
    724          SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
    725          SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, X, X, X, X);
    726          SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Y, Y, Y, Y);
    727          tctx->emit_instruction(tctx, &inst);
    728       } else if (ctx->key->fog == 2) {
    729          /* EXP formula: f = exp(-dens * z)
    730           * with optimized parameters:
    731           *    f = MUL(fogcoord, oparams.z); f= EX2(-f)
    732           */
    733          inst = tgsi_default_full_instruction();
    734          inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    735          inst.Instruction.NumDstRegs = 1;
    736          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
    737          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
    738          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
    739          inst.Instruction.NumSrcRegs = 2;
    740          SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
    741          SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Z, Z, Z, Z);
    742          tctx->emit_instruction(tctx, &inst);
    743 
    744          inst = tgsi_default_full_instruction();
    745          inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    746          inst.Instruction.NumDstRegs = 1;
    747          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
    748          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
    749          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
    750          inst.Instruction.NumSrcRegs = 1;
    751          SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
    752          inst.Src[0].Register.Negate = 1;
    753          tctx->emit_instruction(tctx, &inst);
    754       } else if (ctx->key->fog == 3) {
    755          /* EXP2 formula: f = exp(-(dens * z)^2)
    756           * with optimized parameters:
    757           *    f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f)
    758           */
    759          inst = tgsi_default_full_instruction();
    760          inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    761          inst.Instruction.NumDstRegs = 1;
    762          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
    763          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
    764          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
    765          inst.Instruction.NumSrcRegs = 2;
    766          SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
    767          SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, W, W, W, W);
    768          tctx->emit_instruction(tctx, &inst);
    769 
    770          inst = tgsi_default_full_instruction();
    771          inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    772          inst.Instruction.NumDstRegs = 1;
    773          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
    774          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
    775          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
    776          inst.Instruction.NumSrcRegs = 2;
    777          SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
    778          SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
    779          tctx->emit_instruction(tctx, &inst);
    780 
    781          inst = tgsi_default_full_instruction();
    782          inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    783          inst.Instruction.NumDstRegs = 1;
    784          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
    785          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
    786          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
    787          inst.Instruction.NumSrcRegs = 1;
    788          SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
    789          inst.Src[0].Register.Negate ^= 1;
    790          tctx->emit_instruction(tctx, &inst);
    791       }
    792       /* f = CLAMP(f, 0.0, 1.0) */
    793       inst = tgsi_default_full_instruction();
    794       inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
    795       inst.Instruction.NumDstRegs = 1;
    796       inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
    797       inst.Dst[0].Register.Index = ctx->fog_factor_temp;
    798       inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
    799       inst.Instruction.NumSrcRegs = 3;
    800       SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
    801       SET_SRC(&inst, 1, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, Y, Y, Y, Y); // 0.0
    802       SET_SRC(&inst, 2, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, X, X, X, X); // 1.0
    803       tctx->emit_instruction(tctx, &inst);
    804 
    805       /* REG0 = LRP(f, REG0, fogcolor) */
    806       inst = tgsi_default_full_instruction();
    807       inst.Instruction.Opcode = TGSI_OPCODE_LRP;
    808       inst.Instruction.NumDstRegs = 1;
    809       inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
    810       inst.Dst[0].Register.Index = reg0_index;
    811       inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
    812       inst.Instruction.NumSrcRegs = 3;
    813       SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, X, X, Y);
    814       SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, reg0_index, X, Y, Z, W);
    815       SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI + 1, X, Y, Z, W);
    816       tctx->emit_instruction(tctx, &inst);
    817    }
    818 
    819    tctx->emit_instruction(tctx, current_inst);
    820 }
    821 
    822 /*
    823  * A post-process step in the draw call to fix texture targets and
    824  * insert code for fog.
    825  */
    826 const struct tgsi_token *
    827 st_fixup_atifs(const struct tgsi_token *tokens,
    828                const struct st_fp_variant_key *key)
    829 {
    830    struct tgsi_atifs_transform ctx;
    831    struct tgsi_token *newtoks;
    832    int newlen;
    833 
    834    memset(&ctx, 0, sizeof(ctx));
    835    ctx.base.transform_declaration = transform_decl;
    836    ctx.base.transform_instruction = transform_instr;
    837    ctx.key = key;
    838    tgsi_scan_shader(tokens, &ctx.info);
    839 
    840    newlen = tgsi_num_tokens(tokens) + 30;
    841    newtoks = tgsi_alloc_tokens(newlen);
    842    if (!newtoks)
    843       return NULL;
    844 
    845    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
    846    return newtoks;
    847 }
    848 
    849