Home | History | Annotate | Download | only in program
      1 /*
      2  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
      3  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
      4  * Copyright  2010 Intel Corporation
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     23  * DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 /**
     27  * \file ir_to_mesa.cpp
     28  *
     29  * Translate GLSL IR to Mesa's gl_program representation.
     30  */
     31 
     32 #include <stdio.h>
     33 #include "main/compiler.h"
     34 #include "ir.h"
     35 #include "ir_visitor.h"
     36 #include "ir_print_visitor.h"
     37 #include "ir_expression_flattening.h"
     38 #include "ir_uniform.h"
     39 #include "glsl_types.h"
     40 #include "glsl_parser_extras.h"
     41 #include "../glsl/program.h"
     42 #include "ir_optimization.h"
     43 #include "ast.h"
     44 #include "linker.h"
     45 
     46 #include "main/mtypes.h"
     47 #include "main/shaderobj.h"
     48 #include "program/hash_table.h"
     49 
     50 extern "C" {
     51 #include "main/shaderapi.h"
     52 #include "main/uniforms.h"
     53 #include "program/prog_instruction.h"
     54 #include "program/prog_optimize.h"
     55 #include "program/prog_print.h"
     56 #include "program/program.h"
     57 #include "program/prog_parameter.h"
     58 #include "program/sampler.h"
     59 }
     60 
     61 class src_reg;
     62 class dst_reg;
     63 
     64 static int swizzle_for_size(int size);
     65 
     66 /**
     67  * This struct is a corresponding struct to Mesa prog_src_register, with
     68  * wider fields.
     69  */
     70 class src_reg {
     71 public:
     72    src_reg(gl_register_file file, int index, const glsl_type *type)
     73    {
     74       this->file = file;
     75       this->index = index;
     76       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
     77 	 this->swizzle = swizzle_for_size(type->vector_elements);
     78       else
     79 	 this->swizzle = SWIZZLE_XYZW;
     80       this->negate = 0;
     81       this->reladdr = NULL;
     82    }
     83 
     84    src_reg()
     85    {
     86       this->file = PROGRAM_UNDEFINED;
     87       this->index = 0;
     88       this->swizzle = 0;
     89       this->negate = 0;
     90       this->reladdr = NULL;
     91    }
     92 
     93    explicit src_reg(dst_reg reg);
     94 
     95    gl_register_file file; /**< PROGRAM_* from Mesa */
     96    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
     97    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
     98    int negate; /**< NEGATE_XYZW mask from mesa */
     99    /** Register index should be offset by the integer in this reg. */
    100    src_reg *reladdr;
    101 };
    102 
    103 class dst_reg {
    104 public:
    105    dst_reg(gl_register_file file, int writemask)
    106    {
    107       this->file = file;
    108       this->index = 0;
    109       this->writemask = writemask;
    110       this->cond_mask = COND_TR;
    111       this->reladdr = NULL;
    112    }
    113 
    114    dst_reg()
    115    {
    116       this->file = PROGRAM_UNDEFINED;
    117       this->index = 0;
    118       this->writemask = 0;
    119       this->cond_mask = COND_TR;
    120       this->reladdr = NULL;
    121    }
    122 
    123    explicit dst_reg(src_reg reg);
    124 
    125    gl_register_file file; /**< PROGRAM_* from Mesa */
    126    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
    127    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
    128    GLuint cond_mask:4;
    129    /** Register index should be offset by the integer in this reg. */
    130    src_reg *reladdr;
    131 };
    132 
    133 src_reg::src_reg(dst_reg reg)
    134 {
    135    this->file = reg.file;
    136    this->index = reg.index;
    137    this->swizzle = SWIZZLE_XYZW;
    138    this->negate = 0;
    139    this->reladdr = reg.reladdr;
    140 }
    141 
    142 dst_reg::dst_reg(src_reg reg)
    143 {
    144    this->file = reg.file;
    145    this->index = reg.index;
    146    this->writemask = WRITEMASK_XYZW;
    147    this->cond_mask = COND_TR;
    148    this->reladdr = reg.reladdr;
    149 }
    150 
    151 class ir_to_mesa_instruction : public exec_node {
    152 public:
    153    /* Callers of this ralloc-based new need not call delete. It's
    154     * easier to just ralloc_free 'ctx' (or any of its ancestors). */
    155    static void* operator new(size_t size, void *ctx)
    156    {
    157       void *node;
    158 
    159       node = rzalloc_size(ctx, size);
    160       assert(node != NULL);
    161 
    162       return node;
    163    }
    164 
    165    enum prog_opcode op;
    166    dst_reg dst;
    167    src_reg src[3];
    168    /** Pointer to the ir source this tree came from for debugging */
    169    ir_instruction *ir;
    170    GLboolean cond_update;
    171    bool saturate;
    172    int sampler; /**< sampler index */
    173    int tex_target; /**< One of TEXTURE_*_INDEX */
    174    GLboolean tex_shadow;
    175 };
    176 
    177 class variable_storage : public exec_node {
    178 public:
    179    variable_storage(ir_variable *var, gl_register_file file, int index)
    180       : file(file), index(index), var(var)
    181    {
    182       /* empty */
    183    }
    184 
    185    gl_register_file file;
    186    int index;
    187    ir_variable *var; /* variable that maps to this, if any */
    188 };
    189 
    190 class function_entry : public exec_node {
    191 public:
    192    ir_function_signature *sig;
    193 
    194    /**
    195     * identifier of this function signature used by the program.
    196     *
    197     * At the point that Mesa instructions for function calls are
    198     * generated, we don't know the address of the first instruction of
    199     * the function body.  So we make the BranchTarget that is called a
    200     * small integer and rewrite them during set_branchtargets().
    201     */
    202    int sig_id;
    203 
    204    /**
    205     * Pointer to first instruction of the function body.
    206     *
    207     * Set during function body emits after main() is processed.
    208     */
    209    ir_to_mesa_instruction *bgn_inst;
    210 
    211    /**
    212     * Index of the first instruction of the function body in actual
    213     * Mesa IR.
    214     *
    215     * Set after convertion from ir_to_mesa_instruction to prog_instruction.
    216     */
    217    int inst;
    218 
    219    /** Storage for the return value. */
    220    src_reg return_reg;
    221 };
    222 
    223 class ir_to_mesa_visitor : public ir_visitor {
    224 public:
    225    ir_to_mesa_visitor();
    226    ~ir_to_mesa_visitor();
    227 
    228    function_entry *current_function;
    229 
    230    struct gl_context *ctx;
    231    struct gl_program *prog;
    232    struct gl_shader_program *shader_program;
    233    struct gl_shader_compiler_options *options;
    234 
    235    int next_temp;
    236 
    237    variable_storage *find_variable_storage(ir_variable *var);
    238 
    239    src_reg get_temp(const glsl_type *type);
    240    void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
    241 
    242    src_reg src_reg_for_float(float val);
    243 
    244    /**
    245     * \name Visit methods
    246     *
    247     * As typical for the visitor pattern, there must be one \c visit method for
    248     * each concrete subclass of \c ir_instruction.  Virtual base classes within
    249     * the hierarchy should not have \c visit methods.
    250     */
    251    /*@{*/
    252    virtual void visit(ir_variable *);
    253    virtual void visit(ir_loop *);
    254    virtual void visit(ir_loop_jump *);
    255    virtual void visit(ir_function_signature *);
    256    virtual void visit(ir_function *);
    257    virtual void visit(ir_expression *);
    258    virtual void visit(ir_swizzle *);
    259    virtual void visit(ir_dereference_variable  *);
    260    virtual void visit(ir_dereference_array *);
    261    virtual void visit(ir_dereference_record *);
    262    virtual void visit(ir_assignment *);
    263    virtual void visit(ir_constant *);
    264    virtual void visit(ir_call *);
    265    virtual void visit(ir_return *);
    266    virtual void visit(ir_discard *);
    267    virtual void visit(ir_texture *);
    268    virtual void visit(ir_if *);
    269    /*@}*/
    270 
    271    src_reg result;
    272 
    273    /** List of variable_storage */
    274    exec_list variables;
    275 
    276    /** List of function_entry */
    277    exec_list function_signatures;
    278    int next_signature_id;
    279 
    280    /** List of ir_to_mesa_instruction */
    281    exec_list instructions;
    282 
    283    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op);
    284 
    285    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
    286 			        dst_reg dst, src_reg src0);
    287 
    288    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
    289 			        dst_reg dst, src_reg src0, src_reg src1);
    290 
    291    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
    292 			        dst_reg dst,
    293 			        src_reg src0, src_reg src1, src_reg src2);
    294 
    295    /**
    296     * Emit the correct dot-product instruction for the type of arguments
    297     */
    298    ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
    299 				    dst_reg dst,
    300 				    src_reg src0,
    301 				    src_reg src1,
    302 				    unsigned elements);
    303 
    304    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
    305 		    dst_reg dst, src_reg src0);
    306 
    307    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
    308 		    dst_reg dst, src_reg src0, src_reg src1);
    309 
    310    void emit_scs(ir_instruction *ir, enum prog_opcode op,
    311 		 dst_reg dst, const src_reg &src);
    312 
    313    bool try_emit_mad(ir_expression *ir,
    314 			  int mul_operand);
    315    bool try_emit_mad_for_and_not(ir_expression *ir,
    316 				 int mul_operand);
    317    bool try_emit_sat(ir_expression *ir);
    318 
    319    void emit_swz(ir_expression *ir);
    320 
    321    bool process_move_condition(ir_rvalue *ir);
    322 
    323    void copy_propagate(void);
    324 
    325    void *mem_ctx;
    326 };
    327 
    328 src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL);
    329 
    330 dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
    331 
    332 dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
    333 
    334 static int
    335 swizzle_for_size(int size)
    336 {
    337    static const int size_swizzles[4] = {
    338       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
    339       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
    340       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
    341       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
    342    };
    343 
    344    assert((size >= 1) && (size <= 4));
    345    return size_swizzles[size - 1];
    346 }
    347 
    348 ir_to_mesa_instruction *
    349 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
    350 			 dst_reg dst,
    351 			 src_reg src0, src_reg src1, src_reg src2)
    352 {
    353    ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
    354    int num_reladdr = 0;
    355 
    356    /* If we have to do relative addressing, we want to load the ARL
    357     * reg directly for one of the regs, and preload the other reladdr
    358     * sources into temps.
    359     */
    360    num_reladdr += dst.reladdr != NULL;
    361    num_reladdr += src0.reladdr != NULL;
    362    num_reladdr += src1.reladdr != NULL;
    363    num_reladdr += src2.reladdr != NULL;
    364 
    365    reladdr_to_temp(ir, &src2, &num_reladdr);
    366    reladdr_to_temp(ir, &src1, &num_reladdr);
    367    reladdr_to_temp(ir, &src0, &num_reladdr);
    368 
    369    if (dst.reladdr) {
    370       emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
    371       num_reladdr--;
    372    }
    373    assert(num_reladdr == 0);
    374 
    375    inst->op = op;
    376    inst->dst = dst;
    377    inst->src[0] = src0;
    378    inst->src[1] = src1;
    379    inst->src[2] = src2;
    380    inst->ir = ir;
    381 
    382    this->instructions.push_tail(inst);
    383 
    384    return inst;
    385 }
    386 
    387 
    388 ir_to_mesa_instruction *
    389 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
    390 			 dst_reg dst, src_reg src0, src_reg src1)
    391 {
    392    return emit(ir, op, dst, src0, src1, undef_src);
    393 }
    394 
    395 ir_to_mesa_instruction *
    396 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
    397 			 dst_reg dst, src_reg src0)
    398 {
    399    assert(dst.writemask != 0);
    400    return emit(ir, op, dst, src0, undef_src, undef_src);
    401 }
    402 
    403 ir_to_mesa_instruction *
    404 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
    405 {
    406    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
    407 }
    408 
    409 ir_to_mesa_instruction *
    410 ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
    411 			    dst_reg dst, src_reg src0, src_reg src1,
    412 			    unsigned elements)
    413 {
    414    static const gl_inst_opcode dot_opcodes[] = {
    415       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
    416    };
    417 
    418    return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
    419 }
    420 
    421 /**
    422  * Emits Mesa scalar opcodes to produce unique answers across channels.
    423  *
    424  * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
    425  * channel determines the result across all channels.  So to do a vec4
    426  * of this operation, we want to emit a scalar per source channel used
    427  * to produce dest channels.
    428  */
    429 void
    430 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
    431 			        dst_reg dst,
    432 				src_reg orig_src0, src_reg orig_src1)
    433 {
    434    int i, j;
    435    int done_mask = ~dst.writemask;
    436 
    437    /* Mesa RCP is a scalar operation splatting results to all channels,
    438     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
    439     * dst channels.
    440     */
    441    for (i = 0; i < 4; i++) {
    442       GLuint this_mask = (1 << i);
    443       ir_to_mesa_instruction *inst;
    444       src_reg src0 = orig_src0;
    445       src_reg src1 = orig_src1;
    446 
    447       if (done_mask & this_mask)
    448 	 continue;
    449 
    450       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
    451       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
    452       for (j = i + 1; j < 4; j++) {
    453 	 /* If there is another enabled component in the destination that is
    454 	  * derived from the same inputs, generate its value on this pass as
    455 	  * well.
    456 	  */
    457 	 if (!(done_mask & (1 << j)) &&
    458 	     GET_SWZ(src0.swizzle, j) == src0_swiz &&
    459 	     GET_SWZ(src1.swizzle, j) == src1_swiz) {
    460 	    this_mask |= (1 << j);
    461 	 }
    462       }
    463       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
    464 				   src0_swiz, src0_swiz);
    465       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
    466 				  src1_swiz, src1_swiz);
    467 
    468       inst = emit(ir, op, dst, src0, src1);
    469       inst->dst.writemask = this_mask;
    470       done_mask |= this_mask;
    471    }
    472 }
    473 
    474 void
    475 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
    476 			        dst_reg dst, src_reg src0)
    477 {
    478    src_reg undef = undef_src;
    479 
    480    undef.swizzle = SWIZZLE_XXXX;
    481 
    482    emit_scalar(ir, op, dst, src0, undef);
    483 }
    484 
    485 /**
    486  * Emit an OPCODE_SCS instruction
    487  *
    488  * The \c SCS opcode functions a bit differently than the other Mesa (or
    489  * ARB_fragment_program) opcodes.  Instead of splatting its result across all
    490  * four components of the destination, it writes one value to the \c x
    491  * component and another value to the \c y component.
    492  *
    493  * \param ir        IR instruction being processed
    494  * \param op        Either \c OPCODE_SIN or \c OPCODE_COS depending on which
    495  *                  value is desired.
    496  * \param dst       Destination register
    497  * \param src       Source register
    498  */
    499 void
    500 ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
    501 			     dst_reg dst,
    502 			     const src_reg &src)
    503 {
    504    /* Vertex programs cannot use the SCS opcode.
    505     */
    506    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
    507       emit_scalar(ir, op, dst, src);
    508       return;
    509    }
    510 
    511    const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
    512    const unsigned scs_mask = (1U << component);
    513    int done_mask = ~dst.writemask;
    514    src_reg tmp;
    515 
    516    assert(op == OPCODE_SIN || op == OPCODE_COS);
    517 
    518    /* If there are compnents in the destination that differ from the component
    519     * that will be written by the SCS instrution, we'll need a temporary.
    520     */
    521    if (scs_mask != unsigned(dst.writemask)) {
    522       tmp = get_temp(glsl_type::vec4_type);
    523    }
    524 
    525    for (unsigned i = 0; i < 4; i++) {
    526       unsigned this_mask = (1U << i);
    527       src_reg src0 = src;
    528 
    529       if ((done_mask & this_mask) != 0)
    530 	 continue;
    531 
    532       /* The source swizzle specified which component of the source generates
    533        * sine / cosine for the current component in the destination.  The SCS
    534        * instruction requires that this value be swizzle to the X component.
    535        * Replace the current swizzle with a swizzle that puts the source in
    536        * the X component.
    537        */
    538       unsigned src0_swiz = GET_SWZ(src.swizzle, i);
    539 
    540       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
    541 				   src0_swiz, src0_swiz);
    542       for (unsigned j = i + 1; j < 4; j++) {
    543 	 /* If there is another enabled component in the destination that is
    544 	  * derived from the same inputs, generate its value on this pass as
    545 	  * well.
    546 	  */
    547 	 if (!(done_mask & (1 << j)) &&
    548 	     GET_SWZ(src0.swizzle, j) == src0_swiz) {
    549 	    this_mask |= (1 << j);
    550 	 }
    551       }
    552 
    553       if (this_mask != scs_mask) {
    554 	 ir_to_mesa_instruction *inst;
    555 	 dst_reg tmp_dst = dst_reg(tmp);
    556 
    557 	 /* Emit the SCS instruction.
    558 	  */
    559 	 inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
    560 	 inst->dst.writemask = scs_mask;
    561 
    562 	 /* Move the result of the SCS instruction to the desired location in
    563 	  * the destination.
    564 	  */
    565 	 tmp.swizzle = MAKE_SWIZZLE4(component, component,
    566 				     component, component);
    567 	 inst = emit(ir, OPCODE_SCS, dst, tmp);
    568 	 inst->dst.writemask = this_mask;
    569       } else {
    570 	 /* Emit the SCS instruction to write directly to the destination.
    571 	  */
    572 	 ir_to_mesa_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
    573 	 inst->dst.writemask = scs_mask;
    574       }
    575 
    576       done_mask |= this_mask;
    577    }
    578 }
    579 
    580 src_reg
    581 ir_to_mesa_visitor::src_reg_for_float(float val)
    582 {
    583    src_reg src(PROGRAM_CONSTANT, -1, NULL);
    584 
    585    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
    586 					  (const gl_constant_value *)&val, 1, &src.swizzle);
    587 
    588    return src;
    589 }
    590 
    591 static int
    592 type_size(const struct glsl_type *type)
    593 {
    594    unsigned int i;
    595    int size;
    596 
    597    switch (type->base_type) {
    598    case GLSL_TYPE_UINT:
    599    case GLSL_TYPE_INT:
    600    case GLSL_TYPE_FLOAT:
    601    case GLSL_TYPE_BOOL:
    602       if (type->is_matrix()) {
    603 	 return type->matrix_columns;
    604       } else {
    605 	 /* Regardless of size of vector, it gets a vec4. This is bad
    606 	  * packing for things like floats, but otherwise arrays become a
    607 	  * mess.  Hopefully a later pass over the code can pack scalars
    608 	  * down if appropriate.
    609 	  */
    610 	 return 1;
    611       }
    612    case GLSL_TYPE_ARRAY:
    613       assert(type->length > 0);
    614       return type_size(type->fields.array) * type->length;
    615    case GLSL_TYPE_STRUCT:
    616       size = 0;
    617       for (i = 0; i < type->length; i++) {
    618 	 size += type_size(type->fields.structure[i].type);
    619       }
    620       return size;
    621    case GLSL_TYPE_SAMPLER:
    622       /* Samplers take up one slot in UNIFORMS[], but they're baked in
    623        * at link time.
    624        */
    625       return 1;
    626    default:
    627       assert(0);
    628       return 0;
    629    }
    630 }
    631 
    632 /**
    633  * In the initial pass of codegen, we assign temporary numbers to
    634  * intermediate results.  (not SSA -- variable assignments will reuse
    635  * storage).  Actual register allocation for the Mesa VM occurs in a
    636  * pass over the Mesa IR later.
    637  */
    638 src_reg
    639 ir_to_mesa_visitor::get_temp(const glsl_type *type)
    640 {
    641    src_reg src;
    642 
    643    src.file = PROGRAM_TEMPORARY;
    644    src.index = next_temp;
    645    src.reladdr = NULL;
    646    next_temp += type_size(type);
    647 
    648    if (type->is_array() || type->is_record()) {
    649       src.swizzle = SWIZZLE_NOOP;
    650    } else {
    651       src.swizzle = swizzle_for_size(type->vector_elements);
    652    }
    653    src.negate = 0;
    654 
    655    return src;
    656 }
    657 
    658 variable_storage *
    659 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
    660 {
    661 
    662    variable_storage *entry;
    663 
    664    foreach_iter(exec_list_iterator, iter, this->variables) {
    665       entry = (variable_storage *)iter.get();
    666 
    667       if (entry->var == var)
    668 	 return entry;
    669    }
    670 
    671    return NULL;
    672 }
    673 
    674 void
    675 ir_to_mesa_visitor::visit(ir_variable *ir)
    676 {
    677    if (strcmp(ir->name, "gl_FragCoord") == 0) {
    678       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
    679 
    680       fp->OriginUpperLeft = ir->origin_upper_left;
    681       fp->PixelCenterInteger = ir->pixel_center_integer;
    682    }
    683 
    684    if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
    685       unsigned int i;
    686       const ir_state_slot *const slots = ir->state_slots;
    687       assert(ir->state_slots != NULL);
    688 
    689       /* Check if this statevar's setup in the STATE file exactly
    690        * matches how we'll want to reference it as a
    691        * struct/array/whatever.  If not, then we need to move it into
    692        * temporary storage and hope that it'll get copy-propagated
    693        * out.
    694        */
    695       for (i = 0; i < ir->num_state_slots; i++) {
    696 	 if (slots[i].swizzle != SWIZZLE_XYZW) {
    697 	    break;
    698 	 }
    699       }
    700 
    701       variable_storage *storage;
    702       dst_reg dst;
    703       if (i == ir->num_state_slots) {
    704 	 /* We'll set the index later. */
    705 	 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
    706 	 this->variables.push_tail(storage);
    707 
    708 	 dst = undef_dst;
    709       } else {
    710 	 /* The variable_storage constructor allocates slots based on the size
    711 	  * of the type.  However, this had better match the number of state
    712 	  * elements that we're going to copy into the new temporary.
    713 	  */
    714 	 assert((int) ir->num_state_slots == type_size(ir->type));
    715 
    716 	 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
    717 						 this->next_temp);
    718 	 this->variables.push_tail(storage);
    719 	 this->next_temp += type_size(ir->type);
    720 
    721 	 dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
    722       }
    723 
    724 
    725       for (unsigned int i = 0; i < ir->num_state_slots; i++) {
    726 	 int index = _mesa_add_state_reference(this->prog->Parameters,
    727 					       (gl_state_index *)slots[i].tokens);
    728 
    729 	 if (storage->file == PROGRAM_STATE_VAR) {
    730 	    if (storage->index == -1) {
    731 	       storage->index = index;
    732 	    } else {
    733 	       assert(index == storage->index + (int)i);
    734 	    }
    735 	 } else {
    736 	    src_reg src(PROGRAM_STATE_VAR, index, NULL);
    737 	    src.swizzle = slots[i].swizzle;
    738 	    emit(ir, OPCODE_MOV, dst, src);
    739 	    /* even a float takes up a whole vec4 reg in a struct/array. */
    740 	    dst.index++;
    741 	 }
    742       }
    743 
    744       if (storage->file == PROGRAM_TEMPORARY &&
    745 	  dst.index != storage->index + (int) ir->num_state_slots) {
    746 	 linker_error(this->shader_program,
    747 		      "failed to load builtin uniform `%s' "
    748 		      "(%d/%d regs loaded)\n",
    749 		      ir->name, dst.index - storage->index,
    750 		      type_size(ir->type));
    751       }
    752    }
    753 }
    754 
    755 void
    756 ir_to_mesa_visitor::visit(ir_loop *ir)
    757 {
    758    ir_dereference_variable *counter = NULL;
    759 
    760    if (ir->counter != NULL)
    761       counter = new(mem_ctx) ir_dereference_variable(ir->counter);
    762 
    763    if (ir->from != NULL) {
    764       assert(ir->counter != NULL);
    765 
    766       ir_assignment *a =
    767 	new(mem_ctx) ir_assignment(counter, ir->from, NULL);
    768 
    769       a->accept(this);
    770    }
    771 
    772    emit(NULL, OPCODE_BGNLOOP);
    773 
    774    if (ir->to) {
    775       ir_expression *e =
    776 	 new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type,
    777 					  counter, ir->to);
    778       ir_if *if_stmt =  new(mem_ctx) ir_if(e);
    779 
    780       ir_loop_jump *brk =
    781 	new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break);
    782 
    783       if_stmt->then_instructions.push_tail(brk);
    784 
    785       if_stmt->accept(this);
    786    }
    787 
    788    visit_exec_list(&ir->body_instructions, this);
    789 
    790    if (ir->increment) {
    791       ir_expression *e =
    792 	 new(mem_ctx) ir_expression(ir_binop_add, counter->type,
    793 					  counter, ir->increment);
    794 
    795       ir_assignment *a =
    796 	new(mem_ctx) ir_assignment(counter, e, NULL);
    797 
    798       a->accept(this);
    799    }
    800 
    801    emit(NULL, OPCODE_ENDLOOP);
    802 }
    803 
    804 void
    805 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
    806 {
    807    switch (ir->mode) {
    808    case ir_loop_jump::jump_break:
    809       emit(NULL, OPCODE_BRK);
    810       break;
    811    case ir_loop_jump::jump_continue:
    812       emit(NULL, OPCODE_CONT);
    813       break;
    814    }
    815 }
    816 
    817 
    818 void
    819 ir_to_mesa_visitor::visit(ir_function_signature *ir)
    820 {
    821    assert(0);
    822    (void)ir;
    823 }
    824 
    825 void
    826 ir_to_mesa_visitor::visit(ir_function *ir)
    827 {
    828    /* Ignore function bodies other than main() -- we shouldn't see calls to
    829     * them since they should all be inlined before we get to ir_to_mesa.
    830     */
    831    if (strcmp(ir->name, "main") == 0) {
    832       const ir_function_signature *sig;
    833       exec_list empty;
    834 
    835       sig = ir->matching_signature(&empty);
    836 
    837       assert(sig);
    838 
    839       foreach_iter(exec_list_iterator, iter, sig->body) {
    840 	 ir_instruction *ir = (ir_instruction *)iter.get();
    841 
    842 	 ir->accept(this);
    843       }
    844    }
    845 }
    846 
    847 bool
    848 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    849 {
    850    int nonmul_operand = 1 - mul_operand;
    851    src_reg a, b, c;
    852 
    853    ir_expression *expr = ir->operands[mul_operand]->as_expression();
    854    if (!expr || expr->operation != ir_binop_mul)
    855       return false;
    856 
    857    expr->operands[0]->accept(this);
    858    a = this->result;
    859    expr->operands[1]->accept(this);
    860    b = this->result;
    861    ir->operands[nonmul_operand]->accept(this);
    862    c = this->result;
    863 
    864    this->result = get_temp(ir->type);
    865    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c);
    866 
    867    return true;
    868 }
    869 
    870 /**
    871  * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
    872  *
    873  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
    874  * implemented using multiplication, and logical-or is implemented using
    875  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
    876  * As result, the logical expression (a & !b) can be rewritten as:
    877  *
    878  *     - a * !b
    879  *     - a * (1 - b)
    880  *     - (a * 1) - (a * b)
    881  *     - a + -(a * b)
    882  *     - a + (a * -b)
    883  *
    884  * This final expression can be implemented as a single MAD(a, -b, a)
    885  * instruction.
    886  */
    887 bool
    888 ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
    889 {
    890    const int other_operand = 1 - try_operand;
    891    src_reg a, b;
    892 
    893    ir_expression *expr = ir->operands[try_operand]->as_expression();
    894    if (!expr || expr->operation != ir_unop_logic_not)
    895       return false;
    896 
    897    ir->operands[other_operand]->accept(this);
    898    a = this->result;
    899    expr->operands[0]->accept(this);
    900    b = this->result;
    901 
    902    b.negate = ~b.negate;
    903 
    904    this->result = get_temp(ir->type);
    905    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
    906 
    907    return true;
    908 }
    909 
    910 bool
    911 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
    912 {
    913    /* Saturates were only introduced to vertex programs in
    914     * NV_vertex_program3, so don't give them to drivers in the VP.
    915     */
    916    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
    917       return false;
    918 
    919    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
    920    if (!sat_src)
    921       return false;
    922 
    923    sat_src->accept(this);
    924    src_reg src = this->result;
    925 
    926    /* If we generated an expression instruction into a temporary in
    927     * processing the saturate's operand, apply the saturate to that
    928     * instruction.  Otherwise, generate a MOV to do the saturate.
    929     *
    930     * Note that we have to be careful to only do this optimization if
    931     * the instruction in question was what generated src->result.  For
    932     * example, ir_dereference_array might generate a MUL instruction
    933     * to create the reladdr, and return us a src reg using that
    934     * reladdr.  That MUL result is not the value we're trying to
    935     * saturate.
    936     */
    937    ir_expression *sat_src_expr = sat_src->as_expression();
    938    ir_to_mesa_instruction *new_inst;
    939    new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
    940    if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
    941 			sat_src_expr->operation == ir_binop_add ||
    942 			sat_src_expr->operation == ir_binop_dot)) {
    943       new_inst->saturate = true;
    944    } else {
    945       this->result = get_temp(ir->type);
    946       ir_to_mesa_instruction *inst;
    947       inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
    948       inst->saturate = true;
    949    }
    950 
    951    return true;
    952 }
    953 
    954 void
    955 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
    956 				    src_reg *reg, int *num_reladdr)
    957 {
    958    if (!reg->reladdr)
    959       return;
    960 
    961    emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
    962 
    963    if (*num_reladdr != 1) {
    964       src_reg temp = get_temp(glsl_type::vec4_type);
    965 
    966       emit(ir, OPCODE_MOV, dst_reg(temp), *reg);
    967       *reg = temp;
    968    }
    969 
    970    (*num_reladdr)--;
    971 }
    972 
    973 void
    974 ir_to_mesa_visitor::emit_swz(ir_expression *ir)
    975 {
    976    /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
    977     * This means that each of the operands is either an immediate value of -1,
    978     * 0, or 1, or is a component from one source register (possibly with
    979     * negation).
    980     */
    981    uint8_t components[4] = { 0 };
    982    bool negate[4] = { false };
    983    ir_variable *var = NULL;
    984 
    985    for (unsigned i = 0; i < ir->type->vector_elements; i++) {
    986       ir_rvalue *op = ir->operands[i];
    987 
    988       assert(op->type->is_scalar());
    989 
    990       while (op != NULL) {
    991 	 switch (op->ir_type) {
    992 	 case ir_type_constant: {
    993 
    994 	    assert(op->type->is_scalar());
    995 
    996 	    const ir_constant *const c = op->as_constant();
    997 	    if (c->is_one()) {
    998 	       components[i] = SWIZZLE_ONE;
    999 	    } else if (c->is_zero()) {
   1000 	       components[i] = SWIZZLE_ZERO;
   1001 	    } else if (c->is_negative_one()) {
   1002 	       components[i] = SWIZZLE_ONE;
   1003 	       negate[i] = true;
   1004 	    } else {
   1005 	       assert(!"SWZ constant must be 0.0 or 1.0.");
   1006 	    }
   1007 
   1008 	    op = NULL;
   1009 	    break;
   1010 	 }
   1011 
   1012 	 case ir_type_dereference_variable: {
   1013 	    ir_dereference_variable *const deref =
   1014 	       (ir_dereference_variable *) op;
   1015 
   1016 	    assert((var == NULL) || (deref->var == var));
   1017 	    components[i] = SWIZZLE_X;
   1018 	    var = deref->var;
   1019 	    op = NULL;
   1020 	    break;
   1021 	 }
   1022 
   1023 	 case ir_type_expression: {
   1024 	    ir_expression *const expr = (ir_expression *) op;
   1025 
   1026 	    assert(expr->operation == ir_unop_neg);
   1027 	    negate[i] = true;
   1028 
   1029 	    op = expr->operands[0];
   1030 	    break;
   1031 	 }
   1032 
   1033 	 case ir_type_swizzle: {
   1034 	    ir_swizzle *const swiz = (ir_swizzle *) op;
   1035 
   1036 	    components[i] = swiz->mask.x;
   1037 	    op = swiz->val;
   1038 	    break;
   1039 	 }
   1040 
   1041 	 default:
   1042 	    assert(!"Should not get here.");
   1043 	    return;
   1044 	 }
   1045       }
   1046    }
   1047 
   1048    assert(var != NULL);
   1049 
   1050    ir_dereference_variable *const deref =
   1051       new(mem_ctx) ir_dereference_variable(var);
   1052 
   1053    this->result.file = PROGRAM_UNDEFINED;
   1054    deref->accept(this);
   1055    if (this->result.file == PROGRAM_UNDEFINED) {
   1056       ir_print_visitor v;
   1057       printf("Failed to get tree for expression operand:\n");
   1058       deref->accept(&v);
   1059       exit(1);
   1060    }
   1061 
   1062    src_reg src;
   1063 
   1064    src = this->result;
   1065    src.swizzle = MAKE_SWIZZLE4(components[0],
   1066 			       components[1],
   1067 			       components[2],
   1068 			       components[3]);
   1069    src.negate = ((unsigned(negate[0]) << 0)
   1070 		 | (unsigned(negate[1]) << 1)
   1071 		 | (unsigned(negate[2]) << 2)
   1072 		 | (unsigned(negate[3]) << 3));
   1073 
   1074    /* Storage for our result.  Ideally for an assignment we'd be using the
   1075     * actual storage for the result here, instead.
   1076     */
   1077    const src_reg result_src = get_temp(ir->type);
   1078    dst_reg result_dst = dst_reg(result_src);
   1079 
   1080    /* Limit writes to the channels that will be used by result_src later.
   1081     * This does limit this temp's use as a temporary for multi-instruction
   1082     * sequences.
   1083     */
   1084    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
   1085 
   1086    emit(ir, OPCODE_SWZ, result_dst, src);
   1087    this->result = result_src;
   1088 }
   1089 
   1090 void
   1091 ir_to_mesa_visitor::visit(ir_expression *ir)
   1092 {
   1093    unsigned int operand;
   1094    src_reg op[Elements(ir->operands)];
   1095    src_reg result_src;
   1096    dst_reg result_dst;
   1097 
   1098    /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
   1099     */
   1100    if (ir->operation == ir_binop_add) {
   1101       if (try_emit_mad(ir, 1))
   1102 	 return;
   1103       if (try_emit_mad(ir, 0))
   1104 	 return;
   1105    }
   1106 
   1107    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
   1108     */
   1109    if (ir->operation == ir_binop_logic_and) {
   1110       if (try_emit_mad_for_and_not(ir, 1))
   1111 	 return;
   1112       if (try_emit_mad_for_and_not(ir, 0))
   1113 	 return;
   1114    }
   1115 
   1116    if (try_emit_sat(ir))
   1117       return;
   1118 
   1119    if (ir->operation == ir_quadop_vector) {
   1120       this->emit_swz(ir);
   1121       return;
   1122    }
   1123 
   1124    for (operand = 0; operand < ir->get_num_operands(); operand++) {
   1125       this->result.file = PROGRAM_UNDEFINED;
   1126       ir->operands[operand]->accept(this);
   1127       if (this->result.file == PROGRAM_UNDEFINED) {
   1128 	 ir_print_visitor v;
   1129 	 printf("Failed to get tree for expression operand:\n");
   1130 	 ir->operands[operand]->accept(&v);
   1131 	 exit(1);
   1132       }
   1133       op[operand] = this->result;
   1134 
   1135       /* Matrix expression operands should have been broken down to vector
   1136        * operations already.
   1137        */
   1138       assert(!ir->operands[operand]->type->is_matrix());
   1139    }
   1140 
   1141    int vector_elements = ir->operands[0]->type->vector_elements;
   1142    if (ir->operands[1]) {
   1143       vector_elements = MAX2(vector_elements,
   1144 			     ir->operands[1]->type->vector_elements);
   1145    }
   1146 
   1147    this->result.file = PROGRAM_UNDEFINED;
   1148 
   1149    /* Storage for our result.  Ideally for an assignment we'd be using
   1150     * the actual storage for the result here, instead.
   1151     */
   1152    result_src = get_temp(ir->type);
   1153    /* convenience for the emit functions below. */
   1154    result_dst = dst_reg(result_src);
   1155    /* Limit writes to the channels that will be used by result_src later.
   1156     * This does limit this temp's use as a temporary for multi-instruction
   1157     * sequences.
   1158     */
   1159    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
   1160 
   1161    switch (ir->operation) {
   1162    case ir_unop_logic_not:
   1163       /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
   1164        * older GPUs implement SEQ using multiple instructions (i915 uses two
   1165        * SGE instructions and a MUL instruction).  Since our logic values are
   1166        * 0.0 and 1.0, 1-x also implements !x.
   1167        */
   1168       op[0].negate = ~op[0].negate;
   1169       emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
   1170       break;
   1171    case ir_unop_neg:
   1172       op[0].negate = ~op[0].negate;
   1173       result_src = op[0];
   1174       break;
   1175    case ir_unop_abs:
   1176       emit(ir, OPCODE_ABS, result_dst, op[0]);
   1177       break;
   1178    case ir_unop_sign:
   1179       emit(ir, OPCODE_SSG, result_dst, op[0]);
   1180       break;
   1181    case ir_unop_rcp:
   1182       emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
   1183       break;
   1184 
   1185    case ir_unop_exp2:
   1186       emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
   1187       break;
   1188    case ir_unop_exp:
   1189    case ir_unop_log:
   1190       assert(!"not reached: should be handled by ir_explog_to_explog2");
   1191       break;
   1192    case ir_unop_log2:
   1193       emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
   1194       break;
   1195    case ir_unop_sin:
   1196       emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
   1197       break;
   1198    case ir_unop_cos:
   1199       emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
   1200       break;
   1201    case ir_unop_sin_reduced:
   1202       emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
   1203       break;
   1204    case ir_unop_cos_reduced:
   1205       emit_scs(ir, OPCODE_COS, result_dst, op[0]);
   1206       break;
   1207 
   1208    case ir_unop_dFdx:
   1209       emit(ir, OPCODE_DDX, result_dst, op[0]);
   1210       break;
   1211    case ir_unop_dFdy:
   1212       emit(ir, OPCODE_DDY, result_dst, op[0]);
   1213       break;
   1214 
   1215    case ir_unop_noise: {
   1216       const enum prog_opcode opcode =
   1217 	 prog_opcode(OPCODE_NOISE1
   1218 		     + (ir->operands[0]->type->vector_elements) - 1);
   1219       assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
   1220 
   1221       emit(ir, opcode, result_dst, op[0]);
   1222       break;
   1223    }
   1224 
   1225    case ir_binop_add:
   1226       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
   1227       break;
   1228    case ir_binop_sub:
   1229       emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
   1230       break;
   1231 
   1232    case ir_binop_mul:
   1233       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
   1234       break;
   1235    case ir_binop_div:
   1236       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
   1237       break;
   1238    case ir_binop_mod:
   1239       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
   1240       assert(ir->type->is_integer());
   1241       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
   1242       break;
   1243 
   1244    case ir_binop_less:
   1245       emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
   1246       break;
   1247    case ir_binop_greater:
   1248       emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
   1249       break;
   1250    case ir_binop_lequal:
   1251       emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
   1252       break;
   1253    case ir_binop_gequal:
   1254       emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
   1255       break;
   1256    case ir_binop_equal:
   1257       emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
   1258       break;
   1259    case ir_binop_nequal:
   1260       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
   1261       break;
   1262    case ir_binop_all_equal:
   1263       /* "==" operator producing a scalar boolean. */
   1264       if (ir->operands[0]->type->is_vector() ||
   1265 	  ir->operands[1]->type->is_vector()) {
   1266 	 src_reg temp = get_temp(glsl_type::vec4_type);
   1267 	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
   1268 
   1269 	 /* After the dot-product, the value will be an integer on the
   1270 	  * range [0,4].  Zero becomes 1.0, and positive values become zero.
   1271 	  */
   1272 	 emit_dp(ir, result_dst, temp, temp, vector_elements);
   1273 
   1274 	 /* Negating the result of the dot-product gives values on the range
   1275 	  * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
   1276 	  * achieved using SGE.
   1277 	  */
   1278 	 src_reg sge_src = result_src;
   1279 	 sge_src.negate = ~sge_src.negate;
   1280 	 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
   1281       } else {
   1282 	 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
   1283       }
   1284       break;
   1285    case ir_binop_any_nequal:
   1286       /* "!=" operator producing a scalar boolean. */
   1287       if (ir->operands[0]->type->is_vector() ||
   1288 	  ir->operands[1]->type->is_vector()) {
   1289 	 src_reg temp = get_temp(glsl_type::vec4_type);
   1290 	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
   1291 
   1292 	 /* After the dot-product, the value will be an integer on the
   1293 	  * range [0,4].  Zero stays zero, and positive values become 1.0.
   1294 	  */
   1295 	 ir_to_mesa_instruction *const dp =
   1296 	    emit_dp(ir, result_dst, temp, temp, vector_elements);
   1297 	 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
   1298 	    /* The clamping to [0,1] can be done for free in the fragment
   1299 	     * shader with a saturate.
   1300 	     */
   1301 	    dp->saturate = true;
   1302 	 } else {
   1303 	    /* Negating the result of the dot-product gives values on the range
   1304 	     * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
   1305 	     * achieved using SLT.
   1306 	     */
   1307 	    src_reg slt_src = result_src;
   1308 	    slt_src.negate = ~slt_src.negate;
   1309 	    emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
   1310 	 }
   1311       } else {
   1312 	 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
   1313       }
   1314       break;
   1315 
   1316    case ir_unop_any: {
   1317       assert(ir->operands[0]->type->is_vector());
   1318 
   1319       /* After the dot-product, the value will be an integer on the
   1320        * range [0,4].  Zero stays zero, and positive values become 1.0.
   1321        */
   1322       ir_to_mesa_instruction *const dp =
   1323 	 emit_dp(ir, result_dst, op[0], op[0],
   1324 		 ir->operands[0]->type->vector_elements);
   1325       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
   1326 	 /* The clamping to [0,1] can be done for free in the fragment
   1327 	  * shader with a saturate.
   1328 	  */
   1329 	 dp->saturate = true;
   1330       } else {
   1331 	 /* Negating the result of the dot-product gives values on the range
   1332 	  * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
   1333 	  * is achieved using SLT.
   1334 	  */
   1335 	 src_reg slt_src = result_src;
   1336 	 slt_src.negate = ~slt_src.negate;
   1337 	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
   1338       }
   1339       break;
   1340    }
   1341 
   1342    case ir_binop_logic_xor:
   1343       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
   1344       break;
   1345 
   1346    case ir_binop_logic_or: {
   1347       /* After the addition, the value will be an integer on the
   1348        * range [0,2].  Zero stays zero, and positive values become 1.0.
   1349        */
   1350       ir_to_mesa_instruction *add =
   1351 	 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
   1352       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
   1353 	 /* The clamping to [0,1] can be done for free in the fragment
   1354 	  * shader with a saturate.
   1355 	  */
   1356 	 add->saturate = true;
   1357       } else {
   1358 	 /* Negating the result of the addition gives values on the range
   1359 	  * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
   1360 	  * is achieved using SLT.
   1361 	  */
   1362 	 src_reg slt_src = result_src;
   1363 	 slt_src.negate = ~slt_src.negate;
   1364 	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
   1365       }
   1366       break;
   1367    }
   1368 
   1369    case ir_binop_logic_and:
   1370       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
   1371       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
   1372       break;
   1373 
   1374    case ir_binop_dot:
   1375       assert(ir->operands[0]->type->is_vector());
   1376       assert(ir->operands[0]->type == ir->operands[1]->type);
   1377       emit_dp(ir, result_dst, op[0], op[1],
   1378 	      ir->operands[0]->type->vector_elements);
   1379       break;
   1380 
   1381    case ir_unop_sqrt:
   1382       /* sqrt(x) = x * rsq(x). */
   1383       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
   1384       emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
   1385       /* For incoming channels <= 0, set the result to 0. */
   1386       op[0].negate = ~op[0].negate;
   1387       emit(ir, OPCODE_CMP, result_dst,
   1388 			  op[0], result_src, src_reg_for_float(0.0));
   1389       break;
   1390    case ir_unop_rsq:
   1391       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
   1392       break;
   1393    case ir_unop_i2f:
   1394    case ir_unop_u2f:
   1395    case ir_unop_b2f:
   1396    case ir_unop_b2i:
   1397    case ir_unop_i2u:
   1398    case ir_unop_u2i:
   1399       /* Mesa IR lacks types, ints are stored as truncated floats. */
   1400       result_src = op[0];
   1401       break;
   1402    case ir_unop_f2i:
   1403    case ir_unop_f2u:
   1404       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
   1405       break;
   1406    case ir_unop_f2b:
   1407    case ir_unop_i2b:
   1408       emit(ir, OPCODE_SNE, result_dst,
   1409 			  op[0], src_reg_for_float(0.0));
   1410       break;
   1411    case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway
   1412    case ir_unop_bitcast_f2u:
   1413    case ir_unop_bitcast_i2f:
   1414    case ir_unop_bitcast_u2f:
   1415       break;
   1416    case ir_unop_trunc:
   1417       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
   1418       break;
   1419    case ir_unop_ceil:
   1420       op[0].negate = ~op[0].negate;
   1421       emit(ir, OPCODE_FLR, result_dst, op[0]);
   1422       result_src.negate = ~result_src.negate;
   1423       break;
   1424    case ir_unop_floor:
   1425       emit(ir, OPCODE_FLR, result_dst, op[0]);
   1426       break;
   1427    case ir_unop_fract:
   1428       emit(ir, OPCODE_FRC, result_dst, op[0]);
   1429       break;
   1430 
   1431    case ir_binop_min:
   1432       emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
   1433       break;
   1434    case ir_binop_max:
   1435       emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
   1436       break;
   1437    case ir_binop_pow:
   1438       emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
   1439       break;
   1440 
   1441       /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since
   1442        * hardware backends have no way to avoid Mesa IR generation
   1443        * even if they don't use it, we need to emit "something" and
   1444        * continue.
   1445        */
   1446    case ir_binop_lshift:
   1447    case ir_binop_rshift:
   1448    case ir_binop_bit_and:
   1449    case ir_binop_bit_xor:
   1450    case ir_binop_bit_or:
   1451       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
   1452       break;
   1453 
   1454    case ir_unop_bit_not:
   1455    case ir_unop_round_even:
   1456       emit(ir, OPCODE_MOV, result_dst, op[0]);
   1457       break;
   1458 
   1459    case ir_binop_ubo_load:
   1460       assert(!"not supported");
   1461       break;
   1462 
   1463    case ir_quadop_vector:
   1464       /* This operation should have already been handled.
   1465        */
   1466       assert(!"Should not get here.");
   1467       break;
   1468    }
   1469 
   1470    this->result = result_src;
   1471 }
   1472 
   1473 
   1474 void
   1475 ir_to_mesa_visitor::visit(ir_swizzle *ir)
   1476 {
   1477    src_reg src;
   1478    int i;
   1479    int swizzle[4];
   1480 
   1481    /* Note that this is only swizzles in expressions, not those on the left
   1482     * hand side of an assignment, which do write masking.  See ir_assignment
   1483     * for that.
   1484     */
   1485 
   1486    ir->val->accept(this);
   1487    src = this->result;
   1488    assert(src.file != PROGRAM_UNDEFINED);
   1489 
   1490    for (i = 0; i < 4; i++) {
   1491       if (i < ir->type->vector_elements) {
   1492 	 switch (i) {
   1493 	 case 0:
   1494 	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
   1495 	    break;
   1496 	 case 1:
   1497 	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
   1498 	    break;
   1499 	 case 2:
   1500 	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
   1501 	    break;
   1502 	 case 3:
   1503 	    swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
   1504 	    break;
   1505 	 }
   1506       } else {
   1507 	 /* If the type is smaller than a vec4, replicate the last
   1508 	  * channel out.
   1509 	  */
   1510 	 swizzle[i] = swizzle[ir->type->vector_elements - 1];
   1511       }
   1512    }
   1513 
   1514    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
   1515 
   1516    this->result = src;
   1517 }
   1518 
   1519 void
   1520 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
   1521 {
   1522    variable_storage *entry = find_variable_storage(ir->var);
   1523    ir_variable *var = ir->var;
   1524 
   1525    if (!entry) {
   1526       switch (var->mode) {
   1527       case ir_var_uniform:
   1528 	 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
   1529 					       var->location);
   1530 	 this->variables.push_tail(entry);
   1531 	 break;
   1532       case ir_var_in:
   1533       case ir_var_inout:
   1534 	 /* The linker assigns locations for varyings and attributes,
   1535 	  * including deprecated builtins (like gl_Color),
   1536 	  * user-assigned generic attributes (glBindVertexLocation),
   1537 	  * and user-defined varyings.
   1538 	  *
   1539 	  * FINISHME: We would hit this path for function arguments.  Fix!
   1540 	  */
   1541 	 assert(var->location != -1);
   1542          entry = new(mem_ctx) variable_storage(var,
   1543                                                PROGRAM_INPUT,
   1544                                                var->location);
   1545          break;
   1546       case ir_var_out:
   1547 	 assert(var->location != -1);
   1548          entry = new(mem_ctx) variable_storage(var,
   1549                                                PROGRAM_OUTPUT,
   1550                                                var->location);
   1551 	 break;
   1552       case ir_var_system_value:
   1553          entry = new(mem_ctx) variable_storage(var,
   1554                                                PROGRAM_SYSTEM_VALUE,
   1555                                                var->location);
   1556          break;
   1557       case ir_var_auto:
   1558       case ir_var_temporary:
   1559 	 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
   1560 					       this->next_temp);
   1561 	 this->variables.push_tail(entry);
   1562 
   1563 	 next_temp += type_size(var->type);
   1564 	 break;
   1565       }
   1566 
   1567       if (!entry) {
   1568 	 printf("Failed to make storage for %s\n", var->name);
   1569 	 exit(1);
   1570       }
   1571    }
   1572 
   1573    this->result = src_reg(entry->file, entry->index, var->type);
   1574 }
   1575 
   1576 void
   1577 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
   1578 {
   1579    ir_constant *index;
   1580    src_reg src;
   1581    int element_size = type_size(ir->type);
   1582 
   1583    index = ir->array_index->constant_expression_value();
   1584 
   1585    ir->array->accept(this);
   1586    src = this->result;
   1587 
   1588    if (index) {
   1589       src.index += index->value.i[0] * element_size;
   1590    } else {
   1591       /* Variable index array dereference.  It eats the "vec4" of the
   1592        * base of the array and an index that offsets the Mesa register
   1593        * index.
   1594        */
   1595       ir->array_index->accept(this);
   1596 
   1597       src_reg index_reg;
   1598 
   1599       if (element_size == 1) {
   1600 	 index_reg = this->result;
   1601       } else {
   1602 	 index_reg = get_temp(glsl_type::float_type);
   1603 
   1604 	 emit(ir, OPCODE_MUL, dst_reg(index_reg),
   1605 	      this->result, src_reg_for_float(element_size));
   1606       }
   1607 
   1608       /* If there was already a relative address register involved, add the
   1609        * new and the old together to get the new offset.
   1610        */
   1611       if (src.reladdr != NULL)  {
   1612 	 src_reg accum_reg = get_temp(glsl_type::float_type);
   1613 
   1614 	 emit(ir, OPCODE_ADD, dst_reg(accum_reg),
   1615 	      index_reg, *src.reladdr);
   1616 
   1617 	 index_reg = accum_reg;
   1618       }
   1619 
   1620       src.reladdr = ralloc(mem_ctx, src_reg);
   1621       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
   1622    }
   1623 
   1624    /* If the type is smaller than a vec4, replicate the last channel out. */
   1625    if (ir->type->is_scalar() || ir->type->is_vector())
   1626       src.swizzle = swizzle_for_size(ir->type->vector_elements);
   1627    else
   1628       src.swizzle = SWIZZLE_NOOP;
   1629 
   1630    this->result = src;
   1631 }
   1632 
   1633 void
   1634 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
   1635 {
   1636    unsigned int i;
   1637    const glsl_type *struct_type = ir->record->type;
   1638    int offset = 0;
   1639 
   1640    ir->record->accept(this);
   1641 
   1642    for (i = 0; i < struct_type->length; i++) {
   1643       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
   1644 	 break;
   1645       offset += type_size(struct_type->fields.structure[i].type);
   1646    }
   1647 
   1648    /* If the type is smaller than a vec4, replicate the last channel out. */
   1649    if (ir->type->is_scalar() || ir->type->is_vector())
   1650       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
   1651    else
   1652       this->result.swizzle = SWIZZLE_NOOP;
   1653 
   1654    this->result.index += offset;
   1655 }
   1656 
   1657 /**
   1658  * We want to be careful in assignment setup to hit the actual storage
   1659  * instead of potentially using a temporary like we might with the
   1660  * ir_dereference handler.
   1661  */
   1662 static dst_reg
   1663 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
   1664 {
   1665    /* The LHS must be a dereference.  If the LHS is a variable indexed array
   1666     * access of a vector, it must be separated into a series conditional moves
   1667     * before reaching this point (see ir_vec_index_to_cond_assign).
   1668     */
   1669    assert(ir->as_dereference());
   1670    ir_dereference_array *deref_array = ir->as_dereference_array();
   1671    if (deref_array) {
   1672       assert(!deref_array->array->type->is_vector());
   1673    }
   1674 
   1675    /* Use the rvalue deref handler for the most part.  We'll ignore
   1676     * swizzles in it and write swizzles using writemask, though.
   1677     */
   1678    ir->accept(v);
   1679    return dst_reg(v->result);
   1680 }
   1681 
   1682 /**
   1683  * Process the condition of a conditional assignment
   1684  *
   1685  * Examines the condition of a conditional assignment to generate the optimal
   1686  * first operand of a \c CMP instruction.  If the condition is a relational
   1687  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
   1688  * used as the source for the \c CMP instruction.  Otherwise the comparison
   1689  * is processed to a boolean result, and the boolean result is used as the
   1690  * operand to the CMP instruction.
   1691  */
   1692 bool
   1693 ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
   1694 {
   1695    ir_rvalue *src_ir = ir;
   1696    bool negate = true;
   1697    bool switch_order = false;
   1698 
   1699    ir_expression *const expr = ir->as_expression();
   1700    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
   1701       bool zero_on_left = false;
   1702 
   1703       if (expr->operands[0]->is_zero()) {
   1704 	 src_ir = expr->operands[1];
   1705 	 zero_on_left = true;
   1706       } else if (expr->operands[1]->is_zero()) {
   1707 	 src_ir = expr->operands[0];
   1708 	 zero_on_left = false;
   1709       }
   1710 
   1711       /*      a is -  0  +            -  0  +
   1712        * (a <  0)  T  F  F  ( a < 0)  T  F  F
   1713        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
   1714        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
   1715        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
   1716        * (a >  0)  F  F  T  (-a < 0)  F  F  T
   1717        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
   1718        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
   1719        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
   1720        *
   1721        * Note that exchanging the order of 0 and 'a' in the comparison simply
   1722        * means that the value of 'a' should be negated.
   1723        */
   1724       if (src_ir != ir) {
   1725 	 switch (expr->operation) {
   1726 	 case ir_binop_less:
   1727 	    switch_order = false;
   1728 	    negate = zero_on_left;
   1729 	    break;
   1730 
   1731 	 case ir_binop_greater:
   1732 	    switch_order = false;
   1733 	    negate = !zero_on_left;
   1734 	    break;
   1735 
   1736 	 case ir_binop_lequal:
   1737 	    switch_order = true;
   1738 	    negate = !zero_on_left;
   1739 	    break;
   1740 
   1741 	 case ir_binop_gequal:
   1742 	    switch_order = true;
   1743 	    negate = zero_on_left;
   1744 	    break;
   1745 
   1746 	 default:
   1747 	    /* This isn't the right kind of comparison afterall, so make sure
   1748 	     * the whole condition is visited.
   1749 	     */
   1750 	    src_ir = ir;
   1751 	    break;
   1752 	 }
   1753       }
   1754    }
   1755 
   1756    src_ir->accept(this);
   1757 
   1758    /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
   1759     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
   1760     * choose which value OPCODE_CMP produces without an extra instruction
   1761     * computing the condition.
   1762     */
   1763    if (negate)
   1764       this->result.negate = ~this->result.negate;
   1765 
   1766    return switch_order;
   1767 }
   1768 
   1769 void
   1770 ir_to_mesa_visitor::visit(ir_assignment *ir)
   1771 {
   1772    dst_reg l;
   1773    src_reg r;
   1774    int i;
   1775 
   1776    ir->rhs->accept(this);
   1777    r = this->result;
   1778 
   1779    l = get_assignment_lhs(ir->lhs, this);
   1780 
   1781    /* FINISHME: This should really set to the correct maximal writemask for each
   1782     * FINISHME: component written (in the loops below).  This case can only
   1783     * FINISHME: occur for matrices, arrays, and structures.
   1784     */
   1785    if (ir->write_mask == 0) {
   1786       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
   1787       l.writemask = WRITEMASK_XYZW;
   1788    } else if (ir->lhs->type->is_scalar()) {
   1789       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
   1790        * FINISHME: W component of fragment shader output zero, work correctly.
   1791        */
   1792       l.writemask = WRITEMASK_XYZW;
   1793    } else {
   1794       int swizzles[4];
   1795       int first_enabled_chan = 0;
   1796       int rhs_chan = 0;
   1797 
   1798       assert(ir->lhs->type->is_vector());
   1799       l.writemask = ir->write_mask;
   1800 
   1801       for (int i = 0; i < 4; i++) {
   1802 	 if (l.writemask & (1 << i)) {
   1803 	    first_enabled_chan = GET_SWZ(r.swizzle, i);
   1804 	    break;
   1805 	 }
   1806       }
   1807 
   1808       /* Swizzle a small RHS vector into the channels being written.
   1809        *
   1810        * glsl ir treats write_mask as dictating how many channels are
   1811        * present on the RHS while Mesa IR treats write_mask as just
   1812        * showing which channels of the vec4 RHS get written.
   1813        */
   1814       for (int i = 0; i < 4; i++) {
   1815 	 if (l.writemask & (1 << i))
   1816 	    swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
   1817 	 else
   1818 	    swizzles[i] = first_enabled_chan;
   1819       }
   1820       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
   1821 				swizzles[2], swizzles[3]);
   1822    }
   1823 
   1824    assert(l.file != PROGRAM_UNDEFINED);
   1825    assert(r.file != PROGRAM_UNDEFINED);
   1826 
   1827    if (ir->condition) {
   1828       const bool switch_order = this->process_move_condition(ir->condition);
   1829       src_reg condition = this->result;
   1830 
   1831       for (i = 0; i < type_size(ir->lhs->type); i++) {
   1832 	 if (switch_order) {
   1833 	    emit(ir, OPCODE_CMP, l, condition, src_reg(l), r);
   1834 	 } else {
   1835 	    emit(ir, OPCODE_CMP, l, condition, r, src_reg(l));
   1836 	 }
   1837 
   1838 	 l.index++;
   1839 	 r.index++;
   1840       }
   1841    } else {
   1842       for (i = 0; i < type_size(ir->lhs->type); i++) {
   1843 	 emit(ir, OPCODE_MOV, l, r);
   1844 	 l.index++;
   1845 	 r.index++;
   1846       }
   1847    }
   1848 }
   1849 
   1850 
   1851 void
   1852 ir_to_mesa_visitor::visit(ir_constant *ir)
   1853 {
   1854    src_reg src;
   1855    GLfloat stack_vals[4] = { 0 };
   1856    GLfloat *values = stack_vals;
   1857    unsigned int i;
   1858 
   1859    /* Unfortunately, 4 floats is all we can get into
   1860     * _mesa_add_unnamed_constant.  So, make a temp to store an
   1861     * aggregate constant and move each constant value into it.  If we
   1862     * get lucky, copy propagation will eliminate the extra moves.
   1863     */
   1864 
   1865    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
   1866       src_reg temp_base = get_temp(ir->type);
   1867       dst_reg temp = dst_reg(temp_base);
   1868 
   1869       foreach_iter(exec_list_iterator, iter, ir->components) {
   1870 	 ir_constant *field_value = (ir_constant *)iter.get();
   1871 	 int size = type_size(field_value->type);
   1872 
   1873 	 assert(size > 0);
   1874 
   1875 	 field_value->accept(this);
   1876 	 src = this->result;
   1877 
   1878 	 for (i = 0; i < (unsigned int)size; i++) {
   1879 	    emit(ir, OPCODE_MOV, temp, src);
   1880 
   1881 	    src.index++;
   1882 	    temp.index++;
   1883 	 }
   1884       }
   1885       this->result = temp_base;
   1886       return;
   1887    }
   1888 
   1889    if (ir->type->is_array()) {
   1890       src_reg temp_base = get_temp(ir->type);
   1891       dst_reg temp = dst_reg(temp_base);
   1892       int size = type_size(ir->type->fields.array);
   1893 
   1894       assert(size > 0);
   1895 
   1896       for (i = 0; i < ir->type->length; i++) {
   1897 	 ir->array_elements[i]->accept(this);
   1898 	 src = this->result;
   1899 	 for (int j = 0; j < size; j++) {
   1900 	    emit(ir, OPCODE_MOV, temp, src);
   1901 
   1902 	    src.index++;
   1903 	    temp.index++;
   1904 	 }
   1905       }
   1906       this->result = temp_base;
   1907       return;
   1908    }
   1909 
   1910    if (ir->type->is_matrix()) {
   1911       src_reg mat = get_temp(ir->type);
   1912       dst_reg mat_column = dst_reg(mat);
   1913 
   1914       for (i = 0; i < ir->type->matrix_columns; i++) {
   1915 	 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
   1916 	 values = &ir->value.f[i * ir->type->vector_elements];
   1917 
   1918 	 src = src_reg(PROGRAM_CONSTANT, -1, NULL);
   1919 	 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
   1920 						(gl_constant_value *) values,
   1921 						ir->type->vector_elements,
   1922 						&src.swizzle);
   1923 	 emit(ir, OPCODE_MOV, mat_column, src);
   1924 
   1925 	 mat_column.index++;
   1926       }
   1927 
   1928       this->result = mat;
   1929       return;
   1930    }
   1931 
   1932    src.file = PROGRAM_CONSTANT;
   1933    switch (ir->type->base_type) {
   1934    case GLSL_TYPE_FLOAT:
   1935       values = &ir->value.f[0];
   1936       break;
   1937    case GLSL_TYPE_UINT:
   1938       for (i = 0; i < ir->type->vector_elements; i++) {
   1939 	 values[i] = ir->value.u[i];
   1940       }
   1941       break;
   1942    case GLSL_TYPE_INT:
   1943       for (i = 0; i < ir->type->vector_elements; i++) {
   1944 	 values[i] = ir->value.i[i];
   1945       }
   1946       break;
   1947    case GLSL_TYPE_BOOL:
   1948       for (i = 0; i < ir->type->vector_elements; i++) {
   1949 	 values[i] = ir->value.b[i];
   1950       }
   1951       break;
   1952    default:
   1953       assert(!"Non-float/uint/int/bool constant");
   1954    }
   1955 
   1956    this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
   1957    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
   1958 						   (gl_constant_value *) values,
   1959 						   ir->type->vector_elements,
   1960 						   &this->result.swizzle);
   1961 }
   1962 
   1963 void
   1964 ir_to_mesa_visitor::visit(ir_call *ir)
   1965 {
   1966    assert(!"ir_to_mesa: All function calls should have been inlined by now.");
   1967 }
   1968 
   1969 void
   1970 ir_to_mesa_visitor::visit(ir_texture *ir)
   1971 {
   1972    src_reg result_src, coord, lod_info, projector, dx, dy;
   1973    dst_reg result_dst, coord_dst;
   1974    ir_to_mesa_instruction *inst = NULL;
   1975    prog_opcode opcode = OPCODE_NOP;
   1976 
   1977    if (ir->op == ir_txs)
   1978       this->result = src_reg_for_float(0.0);
   1979    else
   1980       ir->coordinate->accept(this);
   1981 
   1982    /* Put our coords in a temp.  We'll need to modify them for shadow,
   1983     * projection, or LOD, so the only case we'd use it as is is if
   1984     * we're doing plain old texturing.  Mesa IR optimization should
   1985     * handle cleaning up our mess in that case.
   1986     */
   1987    coord = get_temp(glsl_type::vec4_type);
   1988    coord_dst = dst_reg(coord);
   1989    emit(ir, OPCODE_MOV, coord_dst, this->result);
   1990 
   1991    if (ir->projector) {
   1992       ir->projector->accept(this);
   1993       projector = this->result;
   1994    }
   1995 
   1996    /* Storage for our result.  Ideally for an assignment we'd be using
   1997     * the actual storage for the result here, instead.
   1998     */
   1999    result_src = get_temp(glsl_type::vec4_type);
   2000    result_dst = dst_reg(result_src);
   2001 
   2002    switch (ir->op) {
   2003    case ir_tex:
   2004    case ir_txs:
   2005       opcode = OPCODE_TEX;
   2006       break;
   2007    case ir_txb:
   2008       opcode = OPCODE_TXB;
   2009       ir->lod_info.bias->accept(this);
   2010       lod_info = this->result;
   2011       break;
   2012    case ir_txf:
   2013       /* Pretend to be TXL so the sampler, coordinate, lod are available */
   2014    case ir_txl:
   2015       opcode = OPCODE_TXL;
   2016       ir->lod_info.lod->accept(this);
   2017       lod_info = this->result;
   2018       break;
   2019    case ir_txd:
   2020       opcode = OPCODE_TXD;
   2021       ir->lod_info.grad.dPdx->accept(this);
   2022       dx = this->result;
   2023       ir->lod_info.grad.dPdy->accept(this);
   2024       dy = this->result;
   2025       break;
   2026    }
   2027 
   2028    const glsl_type *sampler_type = ir->sampler->type;
   2029 
   2030    if (ir->projector) {
   2031       if (opcode == OPCODE_TEX) {
   2032 	 /* Slot the projector in as the last component of the coord. */
   2033 	 coord_dst.writemask = WRITEMASK_W;
   2034 	 emit(ir, OPCODE_MOV, coord_dst, projector);
   2035 	 coord_dst.writemask = WRITEMASK_XYZW;
   2036 	 opcode = OPCODE_TXP;
   2037       } else {
   2038 	 src_reg coord_w = coord;
   2039 	 coord_w.swizzle = SWIZZLE_WWWW;
   2040 
   2041 	 /* For the other TEX opcodes there's no projective version
   2042 	  * since the last slot is taken up by lod info.  Do the
   2043 	  * projective divide now.
   2044 	  */
   2045 	 coord_dst.writemask = WRITEMASK_W;
   2046 	 emit(ir, OPCODE_RCP, coord_dst, projector);
   2047 
   2048 	 /* In the case where we have to project the coordinates "by hand,"
   2049 	  * the shadow comparitor value must also be projected.
   2050 	  */
   2051 	 src_reg tmp_src = coord;
   2052 	 if (ir->shadow_comparitor) {
   2053 	    /* Slot the shadow value in as the second to last component of the
   2054 	     * coord.
   2055 	     */
   2056 	    ir->shadow_comparitor->accept(this);
   2057 
   2058 	    tmp_src = get_temp(glsl_type::vec4_type);
   2059 	    dst_reg tmp_dst = dst_reg(tmp_src);
   2060 
   2061 	    /* Projective division not allowed for array samplers. */
   2062 	    assert(!sampler_type->sampler_array);
   2063 
   2064 	    tmp_dst.writemask = WRITEMASK_Z;
   2065 	    emit(ir, OPCODE_MOV, tmp_dst, this->result);
   2066 
   2067 	    tmp_dst.writemask = WRITEMASK_XY;
   2068 	    emit(ir, OPCODE_MOV, tmp_dst, coord);
   2069 	 }
   2070 
   2071 	 coord_dst.writemask = WRITEMASK_XYZ;
   2072 	 emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
   2073 
   2074 	 coord_dst.writemask = WRITEMASK_XYZW;
   2075 	 coord.swizzle = SWIZZLE_XYZW;
   2076       }
   2077    }
   2078 
   2079    /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
   2080     * comparitor was put in the correct place (and projected) by the code,
   2081     * above, that handles by-hand projection.
   2082     */
   2083    if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
   2084       /* Slot the shadow value in as the second to last component of the
   2085        * coord.
   2086        */
   2087       ir->shadow_comparitor->accept(this);
   2088 
   2089       /* XXX This will need to be updated for cubemap array samplers. */
   2090       if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
   2091           sampler_type->sampler_array) {
   2092          coord_dst.writemask = WRITEMASK_W;
   2093       } else {
   2094          coord_dst.writemask = WRITEMASK_Z;
   2095       }
   2096 
   2097       emit(ir, OPCODE_MOV, coord_dst, this->result);
   2098       coord_dst.writemask = WRITEMASK_XYZW;
   2099    }
   2100 
   2101    if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
   2102       /* Mesa IR stores lod or lod bias in the last channel of the coords. */
   2103       coord_dst.writemask = WRITEMASK_W;
   2104       emit(ir, OPCODE_MOV, coord_dst, lod_info);
   2105       coord_dst.writemask = WRITEMASK_XYZW;
   2106    }
   2107 
   2108    if (opcode == OPCODE_TXD)
   2109       inst = emit(ir, opcode, result_dst, coord, dx, dy);
   2110    else
   2111       inst = emit(ir, opcode, result_dst, coord);
   2112 
   2113    if (ir->shadow_comparitor)
   2114       inst->tex_shadow = GL_TRUE;
   2115 
   2116    inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
   2117 						   this->shader_program,
   2118 						   this->prog);
   2119 
   2120    switch (sampler_type->sampler_dimensionality) {
   2121    case GLSL_SAMPLER_DIM_1D:
   2122       inst->tex_target = (sampler_type->sampler_array)
   2123 	 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
   2124       break;
   2125    case GLSL_SAMPLER_DIM_2D:
   2126       inst->tex_target = (sampler_type->sampler_array)
   2127 	 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
   2128       break;
   2129    case GLSL_SAMPLER_DIM_3D:
   2130       inst->tex_target = TEXTURE_3D_INDEX;
   2131       break;
   2132    case GLSL_SAMPLER_DIM_CUBE:
   2133       inst->tex_target = TEXTURE_CUBE_INDEX;
   2134       break;
   2135    case GLSL_SAMPLER_DIM_RECT:
   2136       inst->tex_target = TEXTURE_RECT_INDEX;
   2137       break;
   2138    case GLSL_SAMPLER_DIM_BUF:
   2139       assert(!"FINISHME: Implement ARB_texture_buffer_object");
   2140       break;
   2141    case GLSL_SAMPLER_DIM_EXTERNAL:
   2142       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
   2143       break;
   2144    default:
   2145       assert(!"Should not get here.");
   2146    }
   2147 
   2148    this->result = result_src;
   2149 }
   2150 
   2151 void
   2152 ir_to_mesa_visitor::visit(ir_return *ir)
   2153 {
   2154    /* Non-void functions should have been inlined.  We may still emit RETs
   2155     * from main() unless the EmitNoMainReturn option is set.
   2156     */
   2157    assert(!ir->get_value());
   2158    emit(ir, OPCODE_RET);
   2159 }
   2160 
   2161 void
   2162 ir_to_mesa_visitor::visit(ir_discard *ir)
   2163 {
   2164    if (ir->condition) {
   2165       ir->condition->accept(this);
   2166       this->result.negate = ~this->result.negate;
   2167       emit(ir, OPCODE_KIL, undef_dst, this->result);
   2168    } else {
   2169       emit(ir, OPCODE_KIL_NV);
   2170    }
   2171 }
   2172 
   2173 void
   2174 ir_to_mesa_visitor::visit(ir_if *ir)
   2175 {
   2176    ir_to_mesa_instruction *cond_inst, *if_inst;
   2177    ir_to_mesa_instruction *prev_inst;
   2178 
   2179    prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
   2180 
   2181    ir->condition->accept(this);
   2182    assert(this->result.file != PROGRAM_UNDEFINED);
   2183 
   2184    if (this->options->EmitCondCodes) {
   2185       cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
   2186 
   2187       /* See if we actually generated any instruction for generating
   2188        * the condition.  If not, then cook up a move to a temp so we
   2189        * have something to set cond_update on.
   2190        */
   2191       if (cond_inst == prev_inst) {
   2192 	 src_reg temp = get_temp(glsl_type::bool_type);
   2193 	 cond_inst = emit(ir->condition, OPCODE_MOV, dst_reg(temp), result);
   2194       }
   2195       cond_inst->cond_update = GL_TRUE;
   2196 
   2197       if_inst = emit(ir->condition, OPCODE_IF);
   2198       if_inst->dst.cond_mask = COND_NE;
   2199    } else {
   2200       if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
   2201    }
   2202 
   2203    this->instructions.push_tail(if_inst);
   2204 
   2205    visit_exec_list(&ir->then_instructions, this);
   2206 
   2207    if (!ir->else_instructions.is_empty()) {
   2208       emit(ir->condition, OPCODE_ELSE);
   2209       visit_exec_list(&ir->else_instructions, this);
   2210    }
   2211 
   2212    if_inst = emit(ir->condition, OPCODE_ENDIF);
   2213 }
   2214 
   2215 ir_to_mesa_visitor::ir_to_mesa_visitor()
   2216 {
   2217    result.file = PROGRAM_UNDEFINED;
   2218    next_temp = 1;
   2219    next_signature_id = 1;
   2220    current_function = NULL;
   2221    mem_ctx = ralloc_context(NULL);
   2222 }
   2223 
   2224 ir_to_mesa_visitor::~ir_to_mesa_visitor()
   2225 {
   2226    ralloc_free(mem_ctx);
   2227 }
   2228 
   2229 static struct prog_src_register
   2230 mesa_src_reg_from_ir_src_reg(src_reg reg)
   2231 {
   2232    struct prog_src_register mesa_reg;
   2233 
   2234    mesa_reg.File = reg.file;
   2235    assert(reg.index < (1 << INST_INDEX_BITS));
   2236    mesa_reg.Index = reg.index;
   2237    mesa_reg.Swizzle = reg.swizzle;
   2238    mesa_reg.RelAddr = reg.reladdr != NULL;
   2239    mesa_reg.Negate = reg.negate;
   2240    mesa_reg.Abs = 0;
   2241    mesa_reg.HasIndex2 = GL_FALSE;
   2242    mesa_reg.RelAddr2 = 0;
   2243    mesa_reg.Index2 = 0;
   2244 
   2245    return mesa_reg;
   2246 }
   2247 
   2248 static void
   2249 set_branchtargets(ir_to_mesa_visitor *v,
   2250 		  struct prog_instruction *mesa_instructions,
   2251 		  int num_instructions)
   2252 {
   2253    int if_count = 0, loop_count = 0;
   2254    int *if_stack, *loop_stack;
   2255    int if_stack_pos = 0, loop_stack_pos = 0;
   2256    int i, j;
   2257 
   2258    for (i = 0; i < num_instructions; i++) {
   2259       switch (mesa_instructions[i].Opcode) {
   2260       case OPCODE_IF:
   2261 	 if_count++;
   2262 	 break;
   2263       case OPCODE_BGNLOOP:
   2264 	 loop_count++;
   2265 	 break;
   2266       case OPCODE_BRK:
   2267       case OPCODE_CONT:
   2268 	 mesa_instructions[i].BranchTarget = -1;
   2269 	 break;
   2270       default:
   2271 	 break;
   2272       }
   2273    }
   2274 
   2275    if_stack = rzalloc_array(v->mem_ctx, int, if_count);
   2276    loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
   2277 
   2278    for (i = 0; i < num_instructions; i++) {
   2279       switch (mesa_instructions[i].Opcode) {
   2280       case OPCODE_IF:
   2281 	 if_stack[if_stack_pos] = i;
   2282 	 if_stack_pos++;
   2283 	 break;
   2284       case OPCODE_ELSE:
   2285 	 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
   2286 	 if_stack[if_stack_pos - 1] = i;
   2287 	 break;
   2288       case OPCODE_ENDIF:
   2289 	 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
   2290 	 if_stack_pos--;
   2291 	 break;
   2292       case OPCODE_BGNLOOP:
   2293 	 loop_stack[loop_stack_pos] = i;
   2294 	 loop_stack_pos++;
   2295 	 break;
   2296       case OPCODE_ENDLOOP:
   2297 	 loop_stack_pos--;
   2298 	 /* Rewrite any breaks/conts at this nesting level (haven't
   2299 	  * already had a BranchTarget assigned) to point to the end
   2300 	  * of the loop.
   2301 	  */
   2302 	 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
   2303 	    if (mesa_instructions[j].Opcode == OPCODE_BRK ||
   2304 		mesa_instructions[j].Opcode == OPCODE_CONT) {
   2305 	       if (mesa_instructions[j].BranchTarget == -1) {
   2306 		  mesa_instructions[j].BranchTarget = i;
   2307 	       }
   2308 	    }
   2309 	 }
   2310 	 /* The loop ends point at each other. */
   2311 	 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
   2312 	 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
   2313 	 break;
   2314       case OPCODE_CAL:
   2315 	 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
   2316 	    function_entry *entry = (function_entry *)iter.get();
   2317 
   2318 	    if (entry->sig_id == mesa_instructions[i].BranchTarget) {
   2319 	       mesa_instructions[i].BranchTarget = entry->inst;
   2320 	       break;
   2321 	    }
   2322 	 }
   2323 	 break;
   2324       default:
   2325 	 break;
   2326       }
   2327    }
   2328 }
   2329 
   2330 static void
   2331 print_program(struct prog_instruction *mesa_instructions,
   2332 	      ir_instruction **mesa_instruction_annotation,
   2333 	      int num_instructions)
   2334 {
   2335    ir_instruction *last_ir = NULL;
   2336    int i;
   2337    int indent = 0;
   2338 
   2339    for (i = 0; i < num_instructions; i++) {
   2340       struct prog_instruction *mesa_inst = mesa_instructions + i;
   2341       ir_instruction *ir = mesa_instruction_annotation[i];
   2342 
   2343       fprintf(stdout, "%3d: ", i);
   2344 
   2345       if (last_ir != ir && ir) {
   2346 	 int j;
   2347 
   2348 	 for (j = 0; j < indent; j++) {
   2349 	    fprintf(stdout, " ");
   2350 	 }
   2351 	 ir->print();
   2352 	 printf("\n");
   2353 	 last_ir = ir;
   2354 
   2355 	 fprintf(stdout, "     "); /* line number spacing. */
   2356       }
   2357 
   2358       indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
   2359 					    PROG_PRINT_DEBUG, NULL);
   2360    }
   2361 }
   2362 
   2363 class add_uniform_to_shader : public uniform_field_visitor {
   2364 public:
   2365    add_uniform_to_shader(struct gl_shader_program *shader_program,
   2366 			 struct gl_program_parameter_list *params)
   2367       : shader_program(shader_program), params(params), idx(-1)
   2368    {
   2369       /* empty */
   2370    }
   2371 
   2372    void process(ir_variable *var)
   2373    {
   2374       this->idx = -1;
   2375       this->uniform_field_visitor::process(var);
   2376 
   2377       var->location = this->idx;
   2378    }
   2379 
   2380 private:
   2381    virtual void visit_field(const glsl_type *type, const char *name);
   2382 
   2383    struct gl_shader_program *shader_program;
   2384    struct gl_program_parameter_list *params;
   2385    int idx;
   2386 };
   2387 
   2388 void
   2389 add_uniform_to_shader::visit_field(const glsl_type *type, const char *name)
   2390 {
   2391    unsigned int size;
   2392 
   2393    if (type->is_vector() || type->is_scalar()) {
   2394       size = type->vector_elements;
   2395    } else {
   2396       size = type_size(type) * 4;
   2397    }
   2398 
   2399    gl_register_file file;
   2400    if (type->is_sampler() ||
   2401        (type->is_array() && type->fields.array->is_sampler())) {
   2402       file = PROGRAM_SAMPLER;
   2403    } else {
   2404       file = PROGRAM_UNIFORM;
   2405    }
   2406 
   2407    int index = _mesa_lookup_parameter_index(params, -1, name);
   2408    if (index < 0) {
   2409       index = _mesa_add_parameter(params, file, name, size, type->gl_type,
   2410 				  NULL, NULL, 0x0);
   2411 
   2412       /* Sampler uniform values are stored in prog->SamplerUnits,
   2413        * and the entry in that array is selected by this index we
   2414        * store in ParameterValues[].
   2415        */
   2416       if (file == PROGRAM_SAMPLER) {
   2417 	 unsigned location;
   2418 	 const bool found =
   2419 	    this->shader_program->UniformHash->get(location,
   2420 						   params->Parameters[index].Name);
   2421 	 assert(found);
   2422 
   2423 	 if (!found)
   2424 	    return;
   2425 
   2426 	 struct gl_uniform_storage *storage =
   2427 	    &this->shader_program->UniformStorage[location];
   2428 
   2429 	 for (unsigned int j = 0; j < size / 4; j++)
   2430 	    params->ParameterValues[index + j][0].f = storage->sampler + j;
   2431       }
   2432    }
   2433 
   2434    /* The first part of the uniform that's processed determines the base
   2435     * location of the whole uniform (for structures).
   2436     */
   2437    if (this->idx < 0)
   2438       this->idx = index;
   2439 }
   2440 
   2441 /**
   2442  * Generate the program parameters list for the user uniforms in a shader
   2443  *
   2444  * \param shader_program Linked shader program.  This is only used to
   2445  *                       emit possible link errors to the info log.
   2446  * \param sh             Shader whose uniforms are to be processed.
   2447  * \param params         Parameter list to be filled in.
   2448  */
   2449 void
   2450 _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program
   2451 					    *shader_program,
   2452 					    struct gl_shader *sh,
   2453 					    struct gl_program_parameter_list
   2454 					    *params)
   2455 {
   2456    add_uniform_to_shader add(shader_program, params);
   2457 
   2458    foreach_list(node, sh->ir) {
   2459       ir_variable *var = ((ir_instruction *) node)->as_variable();
   2460 
   2461       if ((var == NULL) || (var->mode != ir_var_uniform)
   2462 	  || var->uniform_block != -1 || (strncmp(var->name, "gl_", 3) == 0))
   2463 	 continue;
   2464 
   2465       add.process(var);
   2466    }
   2467 }
   2468 
   2469 void
   2470 _mesa_associate_uniform_storage(struct gl_context *ctx,
   2471 				struct gl_shader_program *shader_program,
   2472 				struct gl_program_parameter_list *params)
   2473 {
   2474    /* After adding each uniform to the parameter list, connect the storage for
   2475     * the parameter with the tracking structure used by the API for the
   2476     * uniform.
   2477     */
   2478    unsigned last_location = unsigned(~0);
   2479    for (unsigned i = 0; i < params->NumParameters; i++) {
   2480       if (params->Parameters[i].Type != PROGRAM_UNIFORM)
   2481 	 continue;
   2482 
   2483       unsigned location;
   2484       const bool found =
   2485 	 shader_program->UniformHash->get(location, params->Parameters[i].Name);
   2486       assert(found);
   2487 
   2488       if (!found)
   2489 	 continue;
   2490 
   2491       if (location != last_location) {
   2492 	 struct gl_uniform_storage *storage =
   2493 	    &shader_program->UniformStorage[location];
   2494 	 enum gl_uniform_driver_format format = uniform_native;
   2495 
   2496 	 unsigned columns = 0;
   2497 	 switch (storage->type->base_type) {
   2498 	 case GLSL_TYPE_UINT:
   2499 	    assert(ctx->Const.NativeIntegers);
   2500 	    format = uniform_native;
   2501 	    columns = 1;
   2502 	    break;
   2503 	 case GLSL_TYPE_INT:
   2504 	    format =
   2505 	       (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float;
   2506 	    columns = 1;
   2507 	    break;
   2508 	 case GLSL_TYPE_FLOAT:
   2509 	    format = uniform_native;
   2510 	    columns = storage->type->matrix_columns;
   2511 	    break;
   2512 	 case GLSL_TYPE_BOOL:
   2513 	    if (ctx->Const.NativeIntegers) {
   2514 	       format = (ctx->Const.UniformBooleanTrue == 1)
   2515 		  ? uniform_bool_int_0_1 : uniform_bool_int_0_not0;
   2516 	    } else {
   2517 	       format = uniform_bool_float;
   2518 	    }
   2519 	    columns = 1;
   2520 	    break;
   2521 	 case GLSL_TYPE_SAMPLER:
   2522 	    format = uniform_native;
   2523 	    columns = 1;
   2524 	    break;
   2525 	 default:
   2526 	    assert(!"Should not get here.");
   2527 	    break;
   2528 	 }
   2529 
   2530 	 _mesa_uniform_attach_driver_storage(storage,
   2531 					     4 * sizeof(float) * columns,
   2532 					     4 * sizeof(float),
   2533 					     format,
   2534 					     &params->ParameterValues[i]);
   2535 
   2536 	 /* After attaching the driver's storage to the uniform, propagate any
   2537 	  * data from the linker's backing store.  This will cause values from
   2538 	  * initializers in the source code to be copied over.
   2539 	  */
   2540 	 _mesa_propagate_uniforms_to_driver_storage(storage,
   2541 						    0,
   2542 						    MAX2(1, storage->array_elements));
   2543 
   2544 	 last_location = location;
   2545       }
   2546    }
   2547 }
   2548 
   2549 /*
   2550  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
   2551  * channels for copy propagation and updates following instructions to
   2552  * use the original versions.
   2553  *
   2554  * The ir_to_mesa_visitor lazily produces code assuming that this pass
   2555  * will occur.  As an example, a TXP production before this pass:
   2556  *
   2557  * 0: MOV TEMP[1], INPUT[4].xyyy;
   2558  * 1: MOV TEMP[1].w, INPUT[4].wwww;
   2559  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
   2560  *
   2561  * and after:
   2562  *
   2563  * 0: MOV TEMP[1], INPUT[4].xyyy;
   2564  * 1: MOV TEMP[1].w, INPUT[4].wwww;
   2565  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
   2566  *
   2567  * which allows for dead code elimination on TEMP[1]'s writes.
   2568  */
   2569 void
   2570 ir_to_mesa_visitor::copy_propagate(void)
   2571 {
   2572    ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
   2573 						    ir_to_mesa_instruction *,
   2574 						    this->next_temp * 4);
   2575    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
   2576    int level = 0;
   2577 
   2578    foreach_iter(exec_list_iterator, iter, this->instructions) {
   2579       ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
   2580 
   2581       assert(inst->dst.file != PROGRAM_TEMPORARY
   2582 	     || inst->dst.index < this->next_temp);
   2583 
   2584       /* First, do any copy propagation possible into the src regs. */
   2585       for (int r = 0; r < 3; r++) {
   2586 	 ir_to_mesa_instruction *first = NULL;
   2587 	 bool good = true;
   2588 	 int acp_base = inst->src[r].index * 4;
   2589 
   2590 	 if (inst->src[r].file != PROGRAM_TEMPORARY ||
   2591 	     inst->src[r].reladdr)
   2592 	    continue;
   2593 
   2594 	 /* See if we can find entries in the ACP consisting of MOVs
   2595 	  * from the same src register for all the swizzled channels
   2596 	  * of this src register reference.
   2597 	  */
   2598 	 for (int i = 0; i < 4; i++) {
   2599 	    int src_chan = GET_SWZ(inst->src[r].swizzle, i);
   2600 	    ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
   2601 
   2602 	    if (!copy_chan) {
   2603 	       good = false;
   2604 	       break;
   2605 	    }
   2606 
   2607 	    assert(acp_level[acp_base + src_chan] <= level);
   2608 
   2609 	    if (!first) {
   2610 	       first = copy_chan;
   2611 	    } else {
   2612 	       if (first->src[0].file != copy_chan->src[0].file ||
   2613 		   first->src[0].index != copy_chan->src[0].index) {
   2614 		  good = false;
   2615 		  break;
   2616 	       }
   2617 	    }
   2618 	 }
   2619 
   2620 	 if (good) {
   2621 	    /* We've now validated that we can copy-propagate to
   2622 	     * replace this src register reference.  Do it.
   2623 	     */
   2624 	    inst->src[r].file = first->src[0].file;
   2625 	    inst->src[r].index = first->src[0].index;
   2626 
   2627 	    int swizzle = 0;
   2628 	    for (int i = 0; i < 4; i++) {
   2629 	       int src_chan = GET_SWZ(inst->src[r].swizzle, i);
   2630 	       ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
   2631 	       swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
   2632 			   (3 * i));
   2633 	    }
   2634 	    inst->src[r].swizzle = swizzle;
   2635 	 }
   2636       }
   2637 
   2638       switch (inst->op) {
   2639       case OPCODE_BGNLOOP:
   2640       case OPCODE_ENDLOOP:
   2641 	 /* End of a basic block, clear the ACP entirely. */
   2642 	 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
   2643 	 break;
   2644 
   2645       case OPCODE_IF:
   2646 	 ++level;
   2647 	 break;
   2648 
   2649       case OPCODE_ENDIF:
   2650       case OPCODE_ELSE:
   2651 	 /* Clear all channels written inside the block from the ACP, but
   2652 	  * leaving those that were not touched.
   2653 	  */
   2654 	 for (int r = 0; r < this->next_temp; r++) {
   2655 	    for (int c = 0; c < 4; c++) {
   2656 	       if (!acp[4 * r + c])
   2657 		  continue;
   2658 
   2659 	       if (acp_level[4 * r + c] >= level)
   2660 		  acp[4 * r + c] = NULL;
   2661 	    }
   2662 	 }
   2663 	 if (inst->op == OPCODE_ENDIF)
   2664 	    --level;
   2665 	 break;
   2666 
   2667       default:
   2668 	 /* Continuing the block, clear any written channels from
   2669 	  * the ACP.
   2670 	  */
   2671 	 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
   2672 	    /* Any temporary might be written, so no copy propagation
   2673 	     * across this instruction.
   2674 	     */
   2675 	    memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
   2676 	 } else if (inst->dst.file == PROGRAM_OUTPUT &&
   2677 		    inst->dst.reladdr) {
   2678 	    /* Any output might be written, so no copy propagation
   2679 	     * from outputs across this instruction.
   2680 	     */
   2681 	    for (int r = 0; r < this->next_temp; r++) {
   2682 	       for (int c = 0; c < 4; c++) {
   2683 		  if (!acp[4 * r + c])
   2684 		     continue;
   2685 
   2686 		  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
   2687 		     acp[4 * r + c] = NULL;
   2688 	       }
   2689 	    }
   2690 	 } else if (inst->dst.file == PROGRAM_TEMPORARY ||
   2691 		    inst->dst.file == PROGRAM_OUTPUT) {
   2692 	    /* Clear where it's used as dst. */
   2693 	    if (inst->dst.file == PROGRAM_TEMPORARY) {
   2694 	       for (int c = 0; c < 4; c++) {
   2695 		  if (inst->dst.writemask & (1 << c)) {
   2696 		     acp[4 * inst->dst.index + c] = NULL;
   2697 		  }
   2698 	       }
   2699 	    }
   2700 
   2701 	    /* Clear where it's used as src. */
   2702 	    for (int r = 0; r < this->next_temp; r++) {
   2703 	       for (int c = 0; c < 4; c++) {
   2704 		  if (!acp[4 * r + c])
   2705 		     continue;
   2706 
   2707 		  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
   2708 
   2709 		  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
   2710 		      acp[4 * r + c]->src[0].index == inst->dst.index &&
   2711 		      inst->dst.writemask & (1 << src_chan))
   2712 		  {
   2713 		     acp[4 * r + c] = NULL;
   2714 		  }
   2715 	       }
   2716 	    }
   2717 	 }
   2718 	 break;
   2719       }
   2720 
   2721       /* If this is a copy, add it to the ACP. */
   2722       if (inst->op == OPCODE_MOV &&
   2723 	  inst->dst.file == PROGRAM_TEMPORARY &&
   2724 	  !inst->dst.reladdr &&
   2725 	  !inst->saturate &&
   2726 	  !inst->src[0].reladdr &&
   2727 	  !inst->src[0].negate) {
   2728 	 for (int i = 0; i < 4; i++) {
   2729 	    if (inst->dst.writemask & (1 << i)) {
   2730 	       acp[4 * inst->dst.index + i] = inst;
   2731 	       acp_level[4 * inst->dst.index + i] = level;
   2732 	    }
   2733 	 }
   2734       }
   2735    }
   2736 
   2737    ralloc_free(acp_level);
   2738    ralloc_free(acp);
   2739 }
   2740 
   2741 
   2742 /**
   2743  * Convert a shader's GLSL IR into a Mesa gl_program.
   2744  */
   2745 static struct gl_program *
   2746 get_mesa_program(struct gl_context *ctx,
   2747                  struct gl_shader_program *shader_program,
   2748 		 struct gl_shader *shader)
   2749 {
   2750    ir_to_mesa_visitor v;
   2751    struct prog_instruction *mesa_instructions, *mesa_inst;
   2752    ir_instruction **mesa_instruction_annotation;
   2753    int i;
   2754    struct gl_program *prog;
   2755    GLenum target;
   2756    const char *target_string;
   2757    struct gl_shader_compiler_options *options =
   2758          &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
   2759 
   2760    switch (shader->Type) {
   2761    case GL_VERTEX_SHADER:
   2762       target = GL_VERTEX_PROGRAM_ARB;
   2763       target_string = "vertex";
   2764       break;
   2765    case GL_FRAGMENT_SHADER:
   2766       target = GL_FRAGMENT_PROGRAM_ARB;
   2767       target_string = "fragment";
   2768       break;
   2769    case GL_GEOMETRY_SHADER:
   2770       target = GL_GEOMETRY_PROGRAM_NV;
   2771       target_string = "geometry";
   2772       break;
   2773    default:
   2774       assert(!"should not be reached");
   2775       return NULL;
   2776    }
   2777 
   2778    validate_ir_tree(shader->ir);
   2779 
   2780    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
   2781    if (!prog)
   2782       return NULL;
   2783    prog->Parameters = _mesa_new_parameter_list();
   2784    v.ctx = ctx;
   2785    v.prog = prog;
   2786    v.shader_program = shader_program;
   2787    v.options = options;
   2788 
   2789    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
   2790 					       prog->Parameters);
   2791 
   2792    /* Emit Mesa IR for main(). */
   2793    visit_exec_list(shader->ir, &v);
   2794    v.emit(NULL, OPCODE_END);
   2795 
   2796    prog->NumTemporaries = v.next_temp;
   2797 
   2798    int num_instructions = 0;
   2799    foreach_iter(exec_list_iterator, iter, v.instructions) {
   2800       num_instructions++;
   2801    }
   2802 
   2803    mesa_instructions =
   2804       (struct prog_instruction *)calloc(num_instructions,
   2805 					sizeof(*mesa_instructions));
   2806    mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
   2807 					      num_instructions);
   2808 
   2809    v.copy_propagate();
   2810 
   2811    /* Convert ir_mesa_instructions into prog_instructions.
   2812     */
   2813    mesa_inst = mesa_instructions;
   2814    i = 0;
   2815    foreach_iter(exec_list_iterator, iter, v.instructions) {
   2816       const ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
   2817 
   2818       mesa_inst->Opcode = inst->op;
   2819       mesa_inst->CondUpdate = inst->cond_update;
   2820       if (inst->saturate)
   2821 	 mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
   2822       mesa_inst->DstReg.File = inst->dst.file;
   2823       mesa_inst->DstReg.Index = inst->dst.index;
   2824       mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
   2825       mesa_inst->DstReg.WriteMask = inst->dst.writemask;
   2826       mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
   2827       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]);
   2828       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]);
   2829       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]);
   2830       mesa_inst->TexSrcUnit = inst->sampler;
   2831       mesa_inst->TexSrcTarget = inst->tex_target;
   2832       mesa_inst->TexShadow = inst->tex_shadow;
   2833       mesa_instruction_annotation[i] = inst->ir;
   2834 
   2835       /* Set IndirectRegisterFiles. */
   2836       if (mesa_inst->DstReg.RelAddr)
   2837          prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
   2838 
   2839       /* Update program's bitmask of indirectly accessed register files */
   2840       for (unsigned src = 0; src < 3; src++)
   2841          if (mesa_inst->SrcReg[src].RelAddr)
   2842             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
   2843 
   2844       switch (mesa_inst->Opcode) {
   2845       case OPCODE_IF:
   2846 	 if (options->MaxIfDepth == 0) {
   2847 	    linker_warning(shader_program,
   2848 			   "Couldn't flatten if-statement.  "
   2849 			   "This will likely result in software "
   2850 			   "rasterization.\n");
   2851 	 }
   2852 	 break;
   2853       case OPCODE_BGNLOOP:
   2854 	 if (options->EmitNoLoops) {
   2855 	    linker_warning(shader_program,
   2856 			   "Couldn't unroll loop.  "
   2857 			   "This will likely result in software "
   2858 			   "rasterization.\n");
   2859 	 }
   2860 	 break;
   2861       case OPCODE_CONT:
   2862 	 if (options->EmitNoCont) {
   2863 	    linker_warning(shader_program,
   2864 			   "Couldn't lower continue-statement.  "
   2865 			   "This will likely result in software "
   2866 			   "rasterization.\n");
   2867 	 }
   2868 	 break;
   2869       case OPCODE_ARL:
   2870 	 prog->NumAddressRegs = 1;
   2871 	 break;
   2872       default:
   2873 	 break;
   2874       }
   2875 
   2876       mesa_inst++;
   2877       i++;
   2878 
   2879       if (!shader_program->LinkStatus)
   2880          break;
   2881    }
   2882 
   2883    if (!shader_program->LinkStatus) {
   2884       goto fail_exit;
   2885    }
   2886 
   2887    set_branchtargets(&v, mesa_instructions, num_instructions);
   2888 
   2889    if (ctx->Shader.Flags & GLSL_DUMP) {
   2890       printf("\n");
   2891       printf("GLSL IR for linked %s program %d:\n", target_string,
   2892 	     shader_program->Name);
   2893       _mesa_print_ir(shader->ir, NULL);
   2894       printf("\n");
   2895       printf("\n");
   2896       printf("Mesa IR for linked %s program %d:\n", target_string,
   2897 	     shader_program->Name);
   2898       print_program(mesa_instructions, mesa_instruction_annotation,
   2899 		    num_instructions);
   2900    }
   2901 
   2902    prog->Instructions = mesa_instructions;
   2903    prog->NumInstructions = num_instructions;
   2904 
   2905    /* Setting this to NULL prevents a possible double free in the fail_exit
   2906     * path (far below).
   2907     */
   2908    mesa_instructions = NULL;
   2909 
   2910    do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
   2911 
   2912    prog->SamplersUsed = shader->active_samplers;
   2913    prog->ShadowSamplers = shader->shadow_samplers;
   2914    _mesa_update_shader_textures_used(shader_program, prog);
   2915 
   2916    /* Set the gl_FragDepth layout. */
   2917    if (target == GL_FRAGMENT_PROGRAM_ARB) {
   2918       struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
   2919       fp->FragDepthLayout = shader_program->FragDepthLayout;
   2920    }
   2921 
   2922    _mesa_reference_program(ctx, &shader->Program, prog);
   2923 
   2924    if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
   2925       _mesa_optimize_program(ctx, prog);
   2926    }
   2927 
   2928    /* This has to be done last.  Any operation that can cause
   2929     * prog->ParameterValues to get reallocated (e.g., anything that adds a
   2930     * program constant) has to happen before creating this linkage.
   2931     */
   2932    _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
   2933    if (!shader_program->LinkStatus) {
   2934       goto fail_exit;
   2935    }
   2936 
   2937    return prog;
   2938 
   2939 fail_exit:
   2940    free(mesa_instructions);
   2941    _mesa_reference_program(ctx, &shader->Program, NULL);
   2942    return NULL;
   2943 }
   2944 
   2945 extern "C" {
   2946 
   2947 /**
   2948  * Link a shader.
   2949  * Called via ctx->Driver.LinkShader()
   2950  * This actually involves converting GLSL IR into Mesa gl_programs with
   2951  * code lowering and other optimizations.
   2952  */
   2953 GLboolean
   2954 _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
   2955 {
   2956    assert(prog->LinkStatus);
   2957 
   2958    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
   2959       if (prog->_LinkedShaders[i] == NULL)
   2960 	 continue;
   2961 
   2962       bool progress;
   2963       exec_list *ir = prog->_LinkedShaders[i]->ir;
   2964       const struct gl_shader_compiler_options *options =
   2965             &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
   2966 
   2967       do {
   2968 	 progress = false;
   2969 
   2970 	 /* Lowering */
   2971 	 do_mat_op_to_vec(ir);
   2972 	 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
   2973 				 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
   2974 				 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
   2975 
   2976 	 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
   2977 
   2978 	 progress = do_common_optimization(ir, true, true,
   2979 					   options->MaxUnrollIterations)
   2980 	   || progress;
   2981 
   2982 	 progress = lower_quadop_vector(ir, true) || progress;
   2983 
   2984 	 if (options->MaxIfDepth == 0)
   2985 	    progress = lower_discard(ir) || progress;
   2986 
   2987 	 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
   2988 
   2989 	 if (options->EmitNoNoise)
   2990 	    progress = lower_noise(ir) || progress;
   2991 
   2992 	 /* If there are forms of indirect addressing that the driver
   2993 	  * cannot handle, perform the lowering pass.
   2994 	  */
   2995 	 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
   2996 	     || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
   2997 	   progress =
   2998 	     lower_variable_index_to_cond_assign(ir,
   2999 						 options->EmitNoIndirectInput,
   3000 						 options->EmitNoIndirectOutput,
   3001 						 options->EmitNoIndirectTemp,
   3002 						 options->EmitNoIndirectUniform)
   3003 	     || progress;
   3004 
   3005 	 progress = do_vec_index_to_cond_assign(ir) || progress;
   3006       } while (progress);
   3007 
   3008       validate_ir_tree(ir);
   3009    }
   3010 
   3011    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
   3012       struct gl_program *linked_prog;
   3013 
   3014       if (prog->_LinkedShaders[i] == NULL)
   3015 	 continue;
   3016 
   3017       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
   3018 
   3019       if (linked_prog) {
   3020 	 static const GLenum targets[] = {
   3021 	    GL_VERTEX_PROGRAM_ARB,
   3022 	    GL_FRAGMENT_PROGRAM_ARB,
   3023 	    GL_GEOMETRY_PROGRAM_NV
   3024 	 };
   3025 
   3026 	 if (i == MESA_SHADER_VERTEX) {
   3027             ((struct gl_vertex_program *)linked_prog)->UsesClipDistance
   3028                = prog->Vert.UsesClipDistance;
   3029 	 }
   3030 
   3031 	 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
   3032 				 linked_prog);
   3033          if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) {
   3034             return GL_FALSE;
   3035          }
   3036       }
   3037 
   3038       _mesa_reference_program(ctx, &linked_prog, NULL);
   3039    }
   3040 
   3041    return prog->LinkStatus;
   3042 }
   3043 
   3044 
   3045 /**
   3046  * Compile a GLSL shader.  Called via glCompileShader().
   3047  */
   3048 void
   3049 _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
   3050 {
   3051    struct _mesa_glsl_parse_state *state =
   3052       new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
   3053 
   3054    const char *source = shader->Source;
   3055    /* Check if the user called glCompileShader without first calling
   3056     * glShaderSource.  This should fail to compile, but not raise a GL_ERROR.
   3057     */
   3058    if (source == NULL) {
   3059       shader->CompileStatus = GL_FALSE;
   3060       return;
   3061    }
   3062 
   3063    state->error = glcpp_preprocess(state, &source, &state->info_log,
   3064 			     &ctx->Extensions, ctx->API);
   3065 
   3066    if (ctx->Shader.Flags & GLSL_DUMP) {
   3067       printf("GLSL source for %s shader %d:\n",
   3068 	     _mesa_glsl_shader_target_name(state->target), shader->Name);
   3069       printf("%s\n", shader->Source);
   3070    }
   3071 
   3072    if (!state->error) {
   3073      _mesa_glsl_lexer_ctor(state, source);
   3074      _mesa_glsl_parse(state);
   3075      _mesa_glsl_lexer_dtor(state);
   3076    }
   3077 
   3078    ralloc_free(shader->ir);
   3079    shader->ir = new(shader) exec_list;
   3080    if (!state->error && !state->translation_unit.is_empty())
   3081       _mesa_ast_to_hir(shader->ir, state);
   3082 
   3083    if (!state->error && !shader->ir->is_empty()) {
   3084       validate_ir_tree(shader->ir);
   3085 
   3086       /* Do some optimization at compile time to reduce shader IR size
   3087        * and reduce later work if the same shader is linked multiple times
   3088        */
   3089       while (do_common_optimization(shader->ir, false, false, 32))
   3090 	 ;
   3091 
   3092       validate_ir_tree(shader->ir);
   3093    }
   3094 
   3095    shader->symbols = state->symbols;
   3096 
   3097    shader->CompileStatus = !state->error;
   3098    shader->InfoLog = state->info_log;
   3099    shader->Version = state->language_version;
   3100    memcpy(shader->builtins_to_link, state->builtins_to_link,
   3101 	  sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
   3102    shader->num_builtins_to_link = state->num_builtins_to_link;
   3103 
   3104    if (ctx->Shader.Flags & GLSL_LOG) {
   3105       _mesa_write_shader_to_file(shader);
   3106    }
   3107 
   3108    if (ctx->Shader.Flags & GLSL_DUMP) {
   3109       if (shader->CompileStatus) {
   3110 	 printf("GLSL IR for shader %d:\n", shader->Name);
   3111 	 _mesa_print_ir(shader->ir, NULL);
   3112 	 printf("\n\n");
   3113       } else {
   3114 	 printf("GLSL shader %d failed to compile.\n", shader->Name);
   3115       }
   3116       if (shader->InfoLog && shader->InfoLog[0] != 0) {
   3117 	 printf("GLSL shader %d info log:\n", shader->Name);
   3118 	 printf("%s\n", shader->InfoLog);
   3119       }
   3120    }
   3121 
   3122    if (shader->UniformBlocks)
   3123       ralloc_free(shader->UniformBlocks);
   3124    shader->NumUniformBlocks = state->num_uniform_blocks;
   3125    shader->UniformBlocks = state->uniform_blocks;
   3126    ralloc_steal(shader, shader->UniformBlocks);
   3127 
   3128    /* Retain any live IR, but trash the rest. */
   3129    reparent_ir(shader->ir, shader->ir);
   3130 
   3131    ralloc_free(state);
   3132 }
   3133 
   3134 
   3135 /**
   3136  * Link a GLSL shader program.  Called via glLinkProgram().
   3137  */
   3138 void
   3139 _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
   3140 {
   3141    unsigned int i;
   3142 
   3143    _mesa_clear_shader_program_data(ctx, prog);
   3144 
   3145    prog->LinkStatus = GL_TRUE;
   3146 
   3147    for (i = 0; i < prog->NumShaders; i++) {
   3148       if (!prog->Shaders[i]->CompileStatus) {
   3149 	 linker_error(prog, "linking with uncompiled shader");
   3150 	 prog->LinkStatus = GL_FALSE;
   3151       }
   3152    }
   3153 
   3154    if (prog->LinkStatus) {
   3155       link_shaders(ctx, prog);
   3156    }
   3157 
   3158    if (prog->LinkStatus) {
   3159       if (!ctx->Driver.LinkShader(ctx, prog)) {
   3160 	 prog->LinkStatus = GL_FALSE;
   3161       }
   3162    }
   3163 
   3164    if (ctx->Shader.Flags & GLSL_DUMP) {
   3165       if (!prog->LinkStatus) {
   3166 	 printf("GLSL shader program %d failed to link\n", prog->Name);
   3167       }
   3168 
   3169       if (prog->InfoLog && prog->InfoLog[0] != 0) {
   3170 	 printf("GLSL shader program %d info log:\n", prog->Name);
   3171 	 printf("%s\n", prog->InfoLog);
   3172       }
   3173    }
   3174 }
   3175 
   3176 } /* extern "C" */
   3177