Home | History | Annotate | Download | only in program
      1 /*
      2  * Copyright  2015 Intel Corporation
      3  * Copyright  2014-2015 Broadcom
      4  * Copyright (C) 2014 Rob Clark <robclark (at) freedesktop.org>
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     23  * IN THE SOFTWARE.
     24  */
     25 
     26 #include "compiler/nir/nir.h"
     27 #include "compiler/nir/nir_builder.h"
     28 #include "compiler/glsl/list.h"
     29 #include "main/imports.h"
     30 #include "util/ralloc.h"
     31 
     32 #include "prog_to_nir.h"
     33 #include "prog_instruction.h"
     34 #include "prog_parameter.h"
     35 #include "prog_print.h"
     36 #include "program.h"
     37 
     38 /**
     39  * \file prog_to_nir.c
     40  *
     41  * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
     42  * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
     43  * vertex processing.  Full GLSL support should use glsl_to_nir instead.
     44  */
     45 
     46 struct ptn_compile {
     47    const struct gl_program *prog;
     48    nir_builder build;
     49    bool error;
     50 
     51    nir_variable *parameters;
     52    nir_variable *input_vars[VARYING_SLOT_MAX];
     53    nir_variable *output_vars[VARYING_SLOT_MAX];
     54    nir_register **output_regs;
     55    nir_register **temp_regs;
     56 
     57    nir_register *addr_reg;
     58 };
     59 
     60 #define SWIZ(X, Y, Z, W) \
     61    (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
     62 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
     63 
     64 static nir_ssa_def *
     65 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
     66 {
     67    nir_builder *b = &c->build;
     68 
     69    nir_alu_src src;
     70    memset(&src, 0, sizeof(src));
     71 
     72    if (dest->dest.is_ssa)
     73       src.src = nir_src_for_ssa(&dest->dest.ssa);
     74    else {
     75       assert(!dest->dest.reg.indirect);
     76       src.src = nir_src_for_reg(dest->dest.reg.reg);
     77       src.src.reg.base_offset = dest->dest.reg.base_offset;
     78    }
     79 
     80    for (int i = 0; i < 4; i++)
     81       src.swizzle[i] = i;
     82 
     83    return nir_fmov_alu(b, src, 4);
     84 }
     85 
     86 static nir_alu_dest
     87 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
     88 {
     89    nir_alu_dest dest;
     90 
     91    memset(&dest, 0, sizeof(dest));
     92 
     93    switch (prog_dst->File) {
     94    case PROGRAM_TEMPORARY:
     95       dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
     96       break;
     97    case PROGRAM_OUTPUT:
     98       dest.dest.reg.reg = c->output_regs[prog_dst->Index];
     99       break;
    100    case PROGRAM_ADDRESS:
    101       assert(prog_dst->Index == 0);
    102       dest.dest.reg.reg = c->addr_reg;
    103       break;
    104    case PROGRAM_UNDEFINED:
    105       break;
    106    }
    107 
    108    dest.write_mask = prog_dst->WriteMask;
    109    dest.saturate = false;
    110 
    111    assert(!prog_dst->RelAddr);
    112 
    113    return dest;
    114 }
    115 
    116 static nir_ssa_def *
    117 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
    118 {
    119    nir_builder *b = &c->build;
    120    nir_alu_src src;
    121 
    122    memset(&src, 0, sizeof(src));
    123 
    124    switch (prog_src->File) {
    125    case PROGRAM_UNDEFINED:
    126       return nir_imm_float(b, 0.0);
    127    case PROGRAM_TEMPORARY:
    128       assert(!prog_src->RelAddr && prog_src->Index >= 0);
    129       src.src.reg.reg = c->temp_regs[prog_src->Index];
    130       break;
    131    case PROGRAM_INPUT: {
    132       /* ARB_vertex_program doesn't allow relative addressing on vertex
    133        * attributes; ARB_fragment_program has no relative addressing at all.
    134        */
    135       assert(!prog_src->RelAddr);
    136 
    137       assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
    138 
    139       nir_intrinsic_instr *load =
    140          nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
    141       load->num_components = 4;
    142       load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
    143 
    144       nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
    145       nir_builder_instr_insert(b, &load->instr);
    146 
    147       src.src = nir_src_for_ssa(&load->dest.ssa);
    148       break;
    149    }
    150    case PROGRAM_STATE_VAR:
    151    case PROGRAM_CONSTANT: {
    152       /* We actually want to look at the type in the Parameters list for this,
    153        * because it lets us upload constant builtin uniforms as actual
    154        * constants.
    155        */
    156       struct gl_program_parameter_list *plist = c->prog->Parameters;
    157       gl_register_file file = prog_src->RelAddr ? prog_src->File :
    158          plist->Parameters[prog_src->Index].Type;
    159 
    160       switch (file) {
    161       case PROGRAM_CONSTANT:
    162          if ((c->prog->arb.IndirectRegisterFiles &
    163               (1 << PROGRAM_CONSTANT)) == 0) {
    164             float *v = (float *) plist->ParameterValues[prog_src->Index];
    165             src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
    166             break;
    167          }
    168          /* FALLTHROUGH */
    169       case PROGRAM_STATE_VAR: {
    170          assert(c->parameters != NULL);
    171 
    172          nir_intrinsic_instr *load =
    173             nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
    174          nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
    175          load->num_components = 4;
    176 
    177          load->variables[0] = nir_deref_var_create(load, c->parameters);
    178          nir_deref_array *deref_arr =
    179             nir_deref_array_create(load->variables[0]);
    180          deref_arr->deref.type = glsl_vec4_type();
    181          load->variables[0]->deref.child = &deref_arr->deref;
    182 
    183          if (prog_src->RelAddr) {
    184             deref_arr->deref_array_type = nir_deref_array_type_indirect;
    185 
    186             nir_alu_src addr_src = { NIR_SRC_INIT };
    187             addr_src.src = nir_src_for_reg(c->addr_reg);
    188             nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
    189 
    190             if (prog_src->Index < 0) {
    191                /* This is a negative offset which should be added to the address
    192                 * register's value.
    193                 */
    194                reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
    195 
    196                deref_arr->base_offset = 0;
    197             } else {
    198                deref_arr->base_offset = prog_src->Index;
    199             }
    200             deref_arr->indirect = nir_src_for_ssa(reladdr);
    201          } else {
    202             deref_arr->deref_array_type = nir_deref_array_type_direct;
    203             deref_arr->base_offset = prog_src->Index;
    204          }
    205 
    206          nir_builder_instr_insert(b, &load->instr);
    207 
    208          src.src = nir_src_for_ssa(&load->dest.ssa);
    209          break;
    210       }
    211       default:
    212          fprintf(stderr, "bad uniform src register file: %s (%d)\n",
    213                  _mesa_register_file_name(file), file);
    214          abort();
    215       }
    216       break;
    217    }
    218    default:
    219       fprintf(stderr, "unknown src register file: %s (%d)\n",
    220               _mesa_register_file_name(prog_src->File), prog_src->File);
    221       abort();
    222    }
    223 
    224    nir_ssa_def *def;
    225    if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
    226        (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
    227       /* The simple non-SWZ case. */
    228       for (int i = 0; i < 4; i++)
    229          src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
    230 
    231       def = nir_fmov_alu(b, src, 4);
    232 
    233       if (prog_src->Negate)
    234          def = nir_fneg(b, def);
    235    } else {
    236       /* The SWZ instruction allows per-component zero/one swizzles, and also
    237        * per-component negation.
    238        */
    239       nir_ssa_def *chans[4];
    240       for (int i = 0; i < 4; i++) {
    241          int swizzle = GET_SWZ(prog_src->Swizzle, i);
    242          if (swizzle == SWIZZLE_ZERO) {
    243             chans[i] = nir_imm_float(b, 0.0);
    244          } else if (swizzle == SWIZZLE_ONE) {
    245             chans[i] = nir_imm_float(b, 1.0);
    246          } else {
    247             assert(swizzle != SWIZZLE_NIL);
    248             nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
    249             nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
    250             mov->dest.write_mask = 0x1;
    251             mov->src[0] = src;
    252             mov->src[0].swizzle[0] = swizzle;
    253             nir_builder_instr_insert(b, &mov->instr);
    254 
    255             chans[i] = &mov->dest.dest.ssa;
    256          }
    257 
    258          if (prog_src->Negate & (1 << i))
    259             chans[i] = nir_fneg(b, chans[i]);
    260       }
    261       def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
    262    }
    263 
    264    return def;
    265 }
    266 
    267 static void
    268 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
    269 {
    270    unsigned num_srcs = nir_op_infos[op].num_inputs;
    271    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
    272    unsigned i;
    273 
    274    for (i = 0; i < num_srcs; i++)
    275       instr->src[i].src = nir_src_for_ssa(src[i]);
    276 
    277    instr->dest = dest;
    278    nir_builder_instr_insert(b, &instr->instr);
    279 }
    280 
    281 static void
    282 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
    283                      nir_ssa_def *def, unsigned write_mask)
    284 {
    285    if (!(dest.write_mask & write_mask))
    286       return;
    287 
    288    nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
    289    if (!mov)
    290       return;
    291 
    292    mov->dest = dest;
    293    mov->dest.write_mask &= write_mask;
    294    mov->src[0].src = nir_src_for_ssa(def);
    295    for (unsigned i = def->num_components; i < 4; i++)
    296       mov->src[0].swizzle[i] = def->num_components - 1;
    297    nir_builder_instr_insert(b, &mov->instr);
    298 }
    299 
    300 static void
    301 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
    302 {
    303    ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
    304 }
    305 
    306 static void
    307 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    308 {
    309    ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
    310 }
    311 
    312 /* EXP - Approximate Exponential Base 2
    313  *  dst.x = 2^{\lfloor src.x\rfloor}
    314  *  dst.y = src.x - \lfloor src.x\rfloor
    315  *  dst.z = 2^{src.x}
    316  *  dst.w = 1.0
    317  */
    318 static void
    319 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    320 {
    321    nir_ssa_def *srcx = ptn_channel(b, src[0], X);
    322 
    323    ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
    324    ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
    325    ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
    326    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
    327 }
    328 
    329 /* LOG - Approximate Logarithm Base 2
    330  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
    331  *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
    332  *  dst.z = \log_2{|src.x|}
    333  *  dst.w = 1.0
    334  */
    335 static void
    336 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    337 {
    338    nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
    339    nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
    340    nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
    341 
    342    ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
    343    ptn_move_dest_masked(b, dest,
    344                         nir_fmul(b, abs_srcx,
    345                                  nir_fexp2(b, nir_fneg(b, floor_log2))),
    346                         WRITEMASK_Y);
    347    ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
    348    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
    349 }
    350 
    351 /* DST - Distance Vector
    352  *   dst.x = 1.0
    353  *   dst.y = src0.y \times src1.y
    354  *   dst.z = src0.z
    355  *   dst.w = src1.w
    356  */
    357 static void
    358 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    359 {
    360    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
    361    ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
    362    ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
    363    ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
    364 }
    365 
    366 /* LIT - Light Coefficients
    367  *  dst.x = 1.0
    368  *  dst.y = max(src.x, 0.0)
    369  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
    370  *  dst.w = 1.0
    371  */
    372 static void
    373 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    374 {
    375    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
    376 
    377    ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
    378                                           nir_imm_float(b, 0.0)), WRITEMASK_Y);
    379 
    380    if (dest.write_mask & WRITEMASK_Z) {
    381       nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
    382       nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
    383                                                  nir_imm_float(b, 128.0)),
    384                                      nir_imm_float(b, -128.0));
    385       nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
    386                                   wclamp);
    387 
    388       nir_ssa_def *z;
    389       if (b->shader->options->native_integers) {
    390          z = nir_bcsel(b,
    391                        nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
    392                        nir_imm_float(b, 0.0),
    393                        pow);
    394       } else {
    395          z = nir_fcsel(b,
    396                        nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
    397                        nir_imm_float(b, 0.0),
    398                        pow);
    399       }
    400 
    401       ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
    402    }
    403 }
    404 
    405 /* SCS - Sine Cosine
    406  *   dst.x = \cos{src.x}
    407  *   dst.y = \sin{src.x}
    408  *   dst.z = 0.0
    409  *   dst.w = 1.0
    410  */
    411 static void
    412 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    413 {
    414    ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
    415                         WRITEMASK_X);
    416    ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
    417                         WRITEMASK_Y);
    418    ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
    419    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
    420 }
    421 
    422 /**
    423  * Emit SLT.  For platforms with integers, prefer b2f(flt(...)).
    424  */
    425 static void
    426 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    427 {
    428    if (b->shader->options->native_integers) {
    429       ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
    430    } else {
    431       ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
    432    }
    433 }
    434 
    435 /**
    436  * Emit SGE.  For platforms with integers, prefer b2f(fge(...)).
    437  */
    438 static void
    439 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    440 {
    441    if (b->shader->options->native_integers) {
    442       ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
    443    } else {
    444       ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
    445    }
    446 }
    447 
    448 static void
    449 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    450 {
    451    ptn_move_dest_masked(b, dest,
    452                         nir_fsub(b,
    453                                  nir_fmul(b,
    454                                           nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
    455                                           nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
    456                                  nir_fmul(b,
    457                                           nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
    458                                           nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
    459                         WRITEMASK_XYZ);
    460    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
    461 }
    462 
    463 static void
    464 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    465 {
    466    ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
    467 }
    468 
    469 static void
    470 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    471 {
    472    ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
    473 }
    474 
    475 static void
    476 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    477 {
    478    ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
    479 }
    480 
    481 static void
    482 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    483 {
    484    ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
    485 }
    486 
    487 static void
    488 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    489 {
    490    if (b->shader->options->native_integers) {
    491       ptn_move_dest(b, dest, nir_bcsel(b,
    492                                        nir_flt(b, src[0], nir_imm_float(b, 0.0)),
    493                                        src[1], src[2]));
    494    } else {
    495       ptn_move_dest(b, dest, nir_fcsel(b,
    496                                        nir_slt(b, src[0], nir_imm_float(b, 0.0)),
    497                                        src[1], src[2]));
    498    }
    499 }
    500 
    501 static void
    502 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
    503 {
    504    ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
    505 }
    506 
    507 static void
    508 ptn_kil(nir_builder *b, nir_ssa_def **src)
    509 {
    510    nir_ssa_def *cmp = b->shader->options->native_integers ?
    511       nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) :
    512       nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
    513 
    514    nir_intrinsic_instr *discard =
    515       nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
    516    discard->src[0] = nir_src_for_ssa(cmp);
    517    nir_builder_instr_insert(b, &discard->instr);
    518 }
    519 
    520 static void
    521 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
    522         struct prog_instruction *prog_inst)
    523 {
    524    nir_tex_instr *instr;
    525    nir_texop op;
    526    unsigned num_srcs;
    527 
    528    switch (prog_inst->Opcode) {
    529    case OPCODE_TEX:
    530       op = nir_texop_tex;
    531       num_srcs = 1;
    532       break;
    533    case OPCODE_TXB:
    534       op = nir_texop_txb;
    535       num_srcs = 2;
    536       break;
    537    case OPCODE_TXD:
    538       op = nir_texop_txd;
    539       num_srcs = 3;
    540       break;
    541    case OPCODE_TXL:
    542       op = nir_texop_txl;
    543       num_srcs = 2;
    544       break;
    545    case OPCODE_TXP:
    546       op = nir_texop_tex;
    547       num_srcs = 2;
    548       break;
    549    default:
    550       fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
    551       abort();
    552    }
    553 
    554    if (prog_inst->TexShadow)
    555       num_srcs++;
    556 
    557    instr = nir_tex_instr_create(b->shader, num_srcs);
    558    instr->op = op;
    559    instr->dest_type = nir_type_float;
    560    instr->is_shadow = prog_inst->TexShadow;
    561    instr->texture_index = prog_inst->TexSrcUnit;
    562    instr->sampler_index = prog_inst->TexSrcUnit;
    563 
    564    switch (prog_inst->TexSrcTarget) {
    565    case TEXTURE_1D_INDEX:
    566       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
    567       break;
    568    case TEXTURE_2D_INDEX:
    569       instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
    570       break;
    571    case TEXTURE_3D_INDEX:
    572       instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
    573       break;
    574    case TEXTURE_CUBE_INDEX:
    575       instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
    576       break;
    577    case TEXTURE_RECT_INDEX:
    578       instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
    579       break;
    580    default:
    581       fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
    582       abort();
    583    }
    584 
    585    switch (instr->sampler_dim) {
    586    case GLSL_SAMPLER_DIM_1D:
    587    case GLSL_SAMPLER_DIM_BUF:
    588       instr->coord_components = 1;
    589       break;
    590    case GLSL_SAMPLER_DIM_2D:
    591    case GLSL_SAMPLER_DIM_RECT:
    592    case GLSL_SAMPLER_DIM_EXTERNAL:
    593    case GLSL_SAMPLER_DIM_MS:
    594       instr->coord_components = 2;
    595       break;
    596    case GLSL_SAMPLER_DIM_3D:
    597    case GLSL_SAMPLER_DIM_CUBE:
    598       instr->coord_components = 3;
    599       break;
    600    case GLSL_SAMPLER_DIM_SUBPASS:
    601    case GLSL_SAMPLER_DIM_SUBPASS_MS:
    602       unreachable("can't reach");
    603    }
    604 
    605    unsigned src_number = 0;
    606 
    607    instr->src[src_number].src =
    608       nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
    609                                   instr->coord_components, true));
    610    instr->src[src_number].src_type = nir_tex_src_coord;
    611    src_number++;
    612 
    613    if (prog_inst->Opcode == OPCODE_TXP) {
    614       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
    615       instr->src[src_number].src_type = nir_tex_src_projector;
    616       src_number++;
    617    }
    618 
    619    if (prog_inst->Opcode == OPCODE_TXB) {
    620       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
    621       instr->src[src_number].src_type = nir_tex_src_bias;
    622       src_number++;
    623    }
    624 
    625    if (prog_inst->Opcode == OPCODE_TXL) {
    626       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
    627       instr->src[src_number].src_type = nir_tex_src_lod;
    628       src_number++;
    629    }
    630 
    631    if (instr->is_shadow) {
    632       if (instr->coord_components < 3)
    633          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
    634       else
    635          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
    636 
    637       instr->src[src_number].src_type = nir_tex_src_comparator;
    638       src_number++;
    639    }
    640 
    641    assert(src_number == num_srcs);
    642 
    643    nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
    644    nir_builder_instr_insert(b, &instr->instr);
    645 
    646    /* Resolve the writemask on the texture op. */
    647    ptn_move_dest(b, dest, &instr->dest.ssa);
    648 }
    649 
    650 static const nir_op op_trans[MAX_OPCODE] = {
    651    [OPCODE_NOP] = 0,
    652    [OPCODE_ABS] = nir_op_fabs,
    653    [OPCODE_ADD] = nir_op_fadd,
    654    [OPCODE_ARL] = 0,
    655    [OPCODE_CMP] = 0,
    656    [OPCODE_COS] = 0,
    657    [OPCODE_DDX] = nir_op_fddx,
    658    [OPCODE_DDY] = nir_op_fddy,
    659    [OPCODE_DP2] = 0,
    660    [OPCODE_DP3] = 0,
    661    [OPCODE_DP4] = 0,
    662    [OPCODE_DPH] = 0,
    663    [OPCODE_DST] = 0,
    664    [OPCODE_END] = 0,
    665    [OPCODE_EX2] = 0,
    666    [OPCODE_EXP] = 0,
    667    [OPCODE_FLR] = nir_op_ffloor,
    668    [OPCODE_FRC] = nir_op_ffract,
    669    [OPCODE_LG2] = 0,
    670    [OPCODE_LIT] = 0,
    671    [OPCODE_LOG] = 0,
    672    [OPCODE_LRP] = 0,
    673    [OPCODE_MAD] = 0,
    674    [OPCODE_MAX] = nir_op_fmax,
    675    [OPCODE_MIN] = nir_op_fmin,
    676    [OPCODE_MOV] = nir_op_fmov,
    677    [OPCODE_MUL] = nir_op_fmul,
    678    [OPCODE_POW] = 0,
    679    [OPCODE_RCP] = 0,
    680 
    681    [OPCODE_RSQ] = 0,
    682    [OPCODE_SCS] = 0,
    683    [OPCODE_SGE] = 0,
    684    [OPCODE_SIN] = 0,
    685    [OPCODE_SLT] = 0,
    686    [OPCODE_SSG] = nir_op_fsign,
    687    [OPCODE_SUB] = nir_op_fsub,
    688    [OPCODE_SWZ] = 0,
    689    [OPCODE_TEX] = 0,
    690    [OPCODE_TRUNC] = nir_op_ftrunc,
    691    [OPCODE_TXB] = 0,
    692    [OPCODE_TXD] = 0,
    693    [OPCODE_TXL] = 0,
    694    [OPCODE_TXP] = 0,
    695    [OPCODE_XPD] = 0,
    696 };
    697 
    698 static void
    699 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
    700 {
    701    nir_builder *b = &c->build;
    702    unsigned i;
    703    const unsigned op = prog_inst->Opcode;
    704 
    705    if (op == OPCODE_END)
    706       return;
    707 
    708    nir_ssa_def *src[3];
    709    for (i = 0; i < 3; i++) {
    710       src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
    711    }
    712    nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
    713    if (c->error)
    714       return;
    715 
    716    switch (op) {
    717    case OPCODE_RSQ:
    718       ptn_move_dest(b, dest,
    719                     nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
    720       break;
    721 
    722    case OPCODE_RCP:
    723       ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
    724       break;
    725 
    726    case OPCODE_EX2:
    727       ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
    728       break;
    729 
    730    case OPCODE_LG2:
    731       ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
    732       break;
    733 
    734    case OPCODE_POW:
    735       ptn_move_dest(b, dest, nir_fpow(b,
    736                                       ptn_channel(b, src[0], X),
    737                                       ptn_channel(b, src[1], X)));
    738       break;
    739 
    740    case OPCODE_COS:
    741       ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
    742       break;
    743 
    744    case OPCODE_SIN:
    745       ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
    746       break;
    747 
    748    case OPCODE_ARL:
    749       ptn_arl(b, dest, src);
    750       break;
    751 
    752    case OPCODE_EXP:
    753       ptn_exp(b, dest, src);
    754       break;
    755 
    756    case OPCODE_LOG:
    757       ptn_log(b, dest, src);
    758       break;
    759 
    760    case OPCODE_LRP:
    761       ptn_lrp(b, dest, src);
    762       break;
    763 
    764    case OPCODE_MAD:
    765       ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
    766       break;
    767 
    768    case OPCODE_DST:
    769       ptn_dst(b, dest, src);
    770       break;
    771 
    772    case OPCODE_LIT:
    773       ptn_lit(b, dest, src);
    774       break;
    775 
    776    case OPCODE_XPD:
    777       ptn_xpd(b, dest, src);
    778       break;
    779 
    780    case OPCODE_DP2:
    781       ptn_dp2(b, dest, src);
    782       break;
    783 
    784    case OPCODE_DP3:
    785       ptn_dp3(b, dest, src);
    786       break;
    787 
    788    case OPCODE_DP4:
    789       ptn_dp4(b, dest, src);
    790       break;
    791 
    792    case OPCODE_DPH:
    793       ptn_dph(b, dest, src);
    794       break;
    795 
    796    case OPCODE_KIL:
    797       ptn_kil(b, src);
    798       break;
    799 
    800    case OPCODE_CMP:
    801       ptn_cmp(b, dest, src);
    802       break;
    803 
    804    case OPCODE_SCS:
    805       ptn_scs(b, dest, src);
    806       break;
    807 
    808    case OPCODE_SLT:
    809       ptn_slt(b, dest, src);
    810       break;
    811 
    812    case OPCODE_SGE:
    813       ptn_sge(b, dest, src);
    814       break;
    815 
    816    case OPCODE_TEX:
    817    case OPCODE_TXB:
    818    case OPCODE_TXD:
    819    case OPCODE_TXL:
    820    case OPCODE_TXP:
    821       ptn_tex(b, dest, src, prog_inst);
    822       break;
    823 
    824    case OPCODE_SWZ:
    825       /* Extended swizzles were already handled in ptn_get_src(). */
    826       ptn_alu(b, nir_op_fmov, dest, src);
    827       break;
    828 
    829    case OPCODE_NOP:
    830       break;
    831 
    832    default:
    833       if (op_trans[op] != 0) {
    834          ptn_alu(b, op_trans[op], dest, src);
    835       } else {
    836          fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
    837          abort();
    838       }
    839       break;
    840    }
    841 
    842    if (prog_inst->Saturate) {
    843       assert(prog_inst->Saturate);
    844       assert(!dest.dest.is_ssa);
    845       ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
    846    }
    847 }
    848 
    849 /**
    850  * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
    851  * variables at the end of the shader.
    852  *
    853  * We don't generate these incrementally as the PROGRAM_OUTPUT values are
    854  * written, because there's no output load intrinsic, which means we couldn't
    855  * handle writemasks.
    856  */
    857 static void
    858 ptn_add_output_stores(struct ptn_compile *c)
    859 {
    860    nir_builder *b = &c->build;
    861 
    862    nir_foreach_variable(var, &b->shader->outputs) {
    863       nir_intrinsic_instr *store =
    864          nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
    865       store->num_components = glsl_get_vector_elements(var->type);
    866       nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1);
    867       store->variables[0] =
    868          nir_deref_var_create(store, c->output_vars[var->data.location]);
    869 
    870       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
    871           var->data.location == FRAG_RESULT_DEPTH) {
    872          /* result.depth has this strange convention of being the .z component of
    873           * a vec4 with undefined .xyw components.  We resolve it to a scalar, to
    874           * match GLSL's gl_FragDepth and the expectations of most backends.
    875           */
    876          nir_alu_src alu_src = { NIR_SRC_INIT };
    877          alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]);
    878          alu_src.swizzle[0] = SWIZZLE_Z;
    879          store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1));
    880       } else {
    881          store->src[0].reg.reg = c->output_regs[var->data.location];
    882       }
    883       nir_builder_instr_insert(b, &store->instr);
    884    }
    885 }
    886 
    887 static void
    888 setup_registers_and_variables(struct ptn_compile *c)
    889 {
    890    nir_builder *b = &c->build;
    891    struct nir_shader *shader = b->shader;
    892 
    893    /* Create input variables. */
    894    const int num_inputs = util_last_bit64(c->prog->info.inputs_read);
    895    for (int i = 0; i < num_inputs; i++) {
    896       if (!(c->prog->info.inputs_read & BITFIELD64_BIT(i)))
    897          continue;
    898 
    899       nir_variable *var =
    900          nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
    901                              ralloc_asprintf(shader, "in_%d", i));
    902       var->data.location = i;
    903       var->data.index = 0;
    904 
    905       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
    906          if (i == VARYING_SLOT_POS) {
    907             var->data.origin_upper_left = c->prog->OriginUpperLeft;
    908             var->data.pixel_center_integer = c->prog->PixelCenterInteger;
    909          } else if (i == VARYING_SLOT_FOGC) {
    910             /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
    911              * input variable a float, and create a local containing the
    912              * full vec4 value.
    913              */
    914             var->type = glsl_float_type();
    915 
    916             nir_intrinsic_instr *load_x =
    917                nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
    918             load_x->num_components = 1;
    919             load_x->variables[0] = nir_deref_var_create(load_x, var);
    920             nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, 32, NULL);
    921             nir_builder_instr_insert(b, &load_x->instr);
    922 
    923             nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
    924                                          nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
    925 
    926             nir_variable *fullvar =
    927                nir_local_variable_create(b->impl, glsl_vec4_type(),
    928                                          "fogcoord_tmp");
    929             nir_intrinsic_instr *store =
    930                nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
    931             store->num_components = 4;
    932             nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
    933             store->variables[0] = nir_deref_var_create(store, fullvar);
    934             store->src[0] = nir_src_for_ssa(f001);
    935             nir_builder_instr_insert(b, &store->instr);
    936 
    937             /* We inserted the real input into the list so the driver has real
    938              * inputs, but we set c->input_vars[i] to the temporary so we use
    939              * the splatted value.
    940              */
    941             c->input_vars[i] = fullvar;
    942             continue;
    943          }
    944       }
    945 
    946       c->input_vars[i] = var;
    947    }
    948 
    949    /* Create output registers and variables. */
    950    int max_outputs = util_last_bit(c->prog->info.outputs_written);
    951    c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
    952 
    953    for (int i = 0; i < max_outputs; i++) {
    954       if (!(c->prog->info.outputs_written & BITFIELD64_BIT(i)))
    955          continue;
    956 
    957       /* Since we can't load from outputs in the IR, we make temporaries
    958        * for the outputs and emit stores to the real outputs at the end of
    959        * the shader.
    960        */
    961       nir_register *reg = nir_local_reg_create(b->impl);
    962       reg->num_components = 4;
    963 
    964       nir_variable *var = rzalloc(shader, nir_variable);
    965       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH)
    966          var->type = glsl_float_type();
    967       else
    968          var->type = glsl_vec4_type();
    969       var->data.mode = nir_var_shader_out;
    970       var->name = ralloc_asprintf(var, "out_%d", i);
    971 
    972       var->data.location = i;
    973       var->data.index = 0;
    974 
    975       c->output_regs[i] = reg;
    976 
    977       exec_list_push_tail(&shader->outputs, &var->node);
    978       c->output_vars[i] = var;
    979    }
    980 
    981    /* Create temporary registers. */
    982    c->temp_regs = rzalloc_array(c, nir_register *,
    983                                 c->prog->arb.NumTemporaries);
    984 
    985    nir_register *reg;
    986    for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
    987       reg = nir_local_reg_create(b->impl);
    988       if (!reg) {
    989          c->error = true;
    990          return;
    991       }
    992       reg->num_components = 4;
    993       c->temp_regs[i] = reg;
    994    }
    995 
    996    /* Create the address register (for ARB_vertex_program). */
    997    reg = nir_local_reg_create(b->impl);
    998    if (!reg) {
    999       c->error = true;
   1000       return;
   1001    }
   1002    reg->num_components = 1;
   1003    c->addr_reg = reg;
   1004 }
   1005 
   1006 struct nir_shader *
   1007 prog_to_nir(const struct gl_program *prog,
   1008             const nir_shader_compiler_options *options)
   1009 {
   1010    struct ptn_compile *c;
   1011    struct nir_shader *s;
   1012    gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
   1013 
   1014    c = rzalloc(NULL, struct ptn_compile);
   1015    if (!c)
   1016       return NULL;
   1017    c->prog = prog;
   1018 
   1019    nir_builder_init_simple_shader(&c->build, NULL, stage, options);
   1020 
   1021    /* Use the shader_info from gl_program rather than the one nir_builder
   1022     * created for us. nir_sweep should clean up the other one for us.
   1023     */
   1024    c->build.shader->info = (shader_info *) &prog->info;
   1025 
   1026    s = c->build.shader;
   1027 
   1028    if (prog->Parameters->NumParameters > 0) {
   1029       c->parameters = rzalloc(s, nir_variable);
   1030       c->parameters->type =
   1031          glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters);
   1032       c->parameters->name = "parameters";
   1033       c->parameters->data.read_only = true;
   1034       c->parameters->data.mode = nir_var_uniform;
   1035       exec_list_push_tail(&s->uniforms, &c->parameters->node);
   1036    }
   1037 
   1038    setup_registers_and_variables(c);
   1039    if (unlikely(c->error))
   1040       goto fail;
   1041 
   1042    for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
   1043       ptn_emit_instruction(c, &prog->arb.Instructions[i]);
   1044 
   1045       if (unlikely(c->error))
   1046          break;
   1047    }
   1048 
   1049    ptn_add_output_stores(c);
   1050 
   1051    s->info->name = ralloc_asprintf(s, "ARB%d", prog->Id);
   1052    s->info->num_textures = util_last_bit(prog->SamplersUsed);
   1053    s->info->num_ubos = 0;
   1054    s->info->num_abos = 0;
   1055    s->info->num_ssbos = 0;
   1056    s->info->num_images = 0;
   1057    s->info->uses_texture_gather = false;
   1058    s->info->clip_distance_array_size = 0;
   1059    s->info->cull_distance_array_size = 0;
   1060    s->info->separate_shader = false;
   1061 
   1062 fail:
   1063    if (c->error) {
   1064       ralloc_free(s);
   1065       s = NULL;
   1066    }
   1067    ralloc_free(c);
   1068    return s;
   1069 }
   1070