Home | History | Annotate | Download | only in program
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     20  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 
     25 
     26 #include "main/glheader.h"
     27 #include "main/context.h"
     28 #include "main/macros.h"
     29 #include "program.h"
     30 #include "prog_instruction.h"
     31 #include "prog_optimize.h"
     32 #include "prog_print.h"
     33 
     34 
     35 #define MAX_LOOP_NESTING 50
     36 /* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
     37  * register allocate many temporary values into that small number of
     38  * temps.  So allow large temporary indices coming into the register
     39  * allocator.
     40  */
     41 #define REG_ALLOCATE_MAX_PROGRAM_TEMPS	((1 << INST_INDEX_BITS) - 1)
     42 
     43 static GLboolean dbg = GL_FALSE;
     44 
     45 #define NO_MASK 0xf
     46 
     47 /**
     48  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
     49  * are read from the given src in this instruction, We also provide
     50  * one optional masks which may mask other components in the dst
     51  * register
     52  */
     53 static GLuint
     54 get_src_arg_mask(const struct prog_instruction *inst,
     55                  GLuint arg, GLuint dst_mask)
     56 {
     57    GLuint read_mask, channel_mask;
     58    GLuint comp;
     59 
     60    assert(arg < _mesa_num_inst_src_regs(inst->Opcode));
     61 
     62    /* Form the dst register, find the written channels */
     63    switch (inst->Opcode) {
     64    case OPCODE_MOV:
     65    case OPCODE_MIN:
     66    case OPCODE_MAX:
     67    case OPCODE_ABS:
     68    case OPCODE_ADD:
     69    case OPCODE_MAD:
     70    case OPCODE_MUL:
     71    case OPCODE_SUB:
     72    case OPCODE_CMP:
     73    case OPCODE_FLR:
     74    case OPCODE_FRC:
     75    case OPCODE_LRP:
     76    case OPCODE_SGE:
     77    case OPCODE_SLT:
     78    case OPCODE_SSG:
     79       channel_mask = inst->DstReg.WriteMask & dst_mask;
     80       break;
     81    case OPCODE_RCP:
     82    case OPCODE_SIN:
     83    case OPCODE_COS:
     84    case OPCODE_RSQ:
     85    case OPCODE_POW:
     86    case OPCODE_EX2:
     87    case OPCODE_LOG:
     88       channel_mask = WRITEMASK_X;
     89       break;
     90    case OPCODE_DP2:
     91       channel_mask = WRITEMASK_XY;
     92       break;
     93    case OPCODE_DP3:
     94    case OPCODE_XPD:
     95       channel_mask = WRITEMASK_XYZ;
     96       break;
     97    default:
     98       channel_mask = WRITEMASK_XYZW;
     99       break;
    100    }
    101 
    102    /* Now, given the src swizzle and the written channels, find which
    103     * components are actually read
    104     */
    105    read_mask = 0x0;
    106    for (comp = 0; comp < 4; ++comp) {
    107       const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
    108       if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
    109          read_mask |= 1 << coord;
    110    }
    111 
    112    return read_mask;
    113 }
    114 
    115 
    116 /**
    117  * For a MOV instruction, compute a write mask when src register also has
    118  * a mask
    119  */
    120 static GLuint
    121 get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
    122 {
    123    const GLuint mask = mov->DstReg.WriteMask;
    124    GLuint comp;
    125    GLuint updated_mask = 0x0;
    126 
    127    assert(mov->Opcode == OPCODE_MOV);
    128 
    129    for (comp = 0; comp < 4; ++comp) {
    130       GLuint src_comp;
    131       if ((mask & (1 << comp)) == 0)
    132          continue;
    133       src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
    134       if ((src_mask & (1 << src_comp)) == 0)
    135          continue;
    136       updated_mask |= 1 << comp;
    137    }
    138 
    139    return updated_mask;
    140 }
    141 
    142 
    143 /**
    144  * Ensure that the swizzle is regular.  That is, all of the swizzle
    145  * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
    146  */
    147 static GLboolean
    148 is_swizzle_regular(GLuint swz)
    149 {
    150    return GET_SWZ(swz,0) <= SWIZZLE_W &&
    151           GET_SWZ(swz,1) <= SWIZZLE_W &&
    152           GET_SWZ(swz,2) <= SWIZZLE_W &&
    153           GET_SWZ(swz,3) <= SWIZZLE_W;
    154 }
    155 
    156 
    157 /**
    158  * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
    159  * \return number of instructions removed
    160  */
    161 static GLuint
    162 remove_instructions(struct gl_program *prog, const GLboolean *removeFlags,
    163                     void *mem_ctx)
    164 {
    165    GLint i, removeEnd = 0, removeCount = 0;
    166    GLuint totalRemoved = 0;
    167 
    168    /* go backward */
    169    for (i = prog->arb.NumInstructions - 1; i >= 0; i--) {
    170       if (removeFlags[i]) {
    171          totalRemoved++;
    172          if (removeCount == 0) {
    173             /* begin a run of instructions to remove */
    174             removeEnd = i;
    175             removeCount = 1;
    176          }
    177          else {
    178             /* extend the run of instructions to remove */
    179             removeCount++;
    180          }
    181       }
    182       else {
    183          /* don't remove this instruction, but check if the preceeding
    184           * instructions are to be removed.
    185           */
    186          if (removeCount > 0) {
    187             GLint removeStart = removeEnd - removeCount + 1;
    188             _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
    189             removeStart = removeCount = 0; /* reset removal info */
    190          }
    191       }
    192    }
    193    /* Finish removing if the first instruction was to be removed. */
    194    if (removeCount > 0) {
    195       GLint removeStart = removeEnd - removeCount + 1;
    196       _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
    197    }
    198    return totalRemoved;
    199 }
    200 
    201 
    202 /**
    203  * Remap register indexes according to map.
    204  * \param prog  the program to search/replace
    205  * \param file  the type of register file to search/replace
    206  * \param map  maps old register indexes to new indexes
    207  */
    208 static void
    209 replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
    210 {
    211    GLuint i;
    212 
    213    for (i = 0; i < prog->arb.NumInstructions; i++) {
    214       struct prog_instruction *inst = prog->arb.Instructions + i;
    215       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
    216       GLuint j;
    217       for (j = 0; j < numSrc; j++) {
    218          if (inst->SrcReg[j].File == file) {
    219             GLuint index = inst->SrcReg[j].Index;
    220             assert(map[index] >= 0);
    221             inst->SrcReg[j].Index = map[index];
    222          }
    223       }
    224       if (inst->DstReg.File == file) {
    225          const GLuint index = inst->DstReg.Index;
    226          assert(map[index] >= 0);
    227          inst->DstReg.Index = map[index];
    228       }
    229    }
    230 }
    231 
    232 
    233 /**
    234  * Remove dead instructions from the given program.
    235  * This is very primitive for now.  Basically look for temp registers
    236  * that are written to but never read.  Remove any instructions that
    237  * write to such registers.  Be careful with condition code setters.
    238  */
    239 static GLboolean
    240 _mesa_remove_dead_code_global(struct gl_program *prog, void *mem_ctx)
    241 {
    242    GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
    243    GLboolean *removeInst; /* per-instruction removal flag */
    244    GLuint i, rem = 0, comp;
    245 
    246    memset(tempRead, 0, sizeof(tempRead));
    247 
    248    if (dbg) {
    249       printf("Optimize: Begin dead code removal\n");
    250       /*_mesa_print_program(prog);*/
    251    }
    252 
    253    removeInst =
    254       calloc(prog->arb.NumInstructions, sizeof(GLboolean));
    255 
    256    /* Determine which temps are read and written */
    257    for (i = 0; i < prog->arb.NumInstructions; i++) {
    258       const struct prog_instruction *inst = prog->arb.Instructions + i;
    259       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
    260       GLuint j;
    261 
    262       /* check src regs */
    263       for (j = 0; j < numSrc; j++) {
    264          if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
    265             const GLuint index = inst->SrcReg[j].Index;
    266             GLuint read_mask;
    267             assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
    268 	    read_mask = get_src_arg_mask(inst, j, NO_MASK);
    269 
    270             if (inst->SrcReg[j].RelAddr) {
    271                if (dbg)
    272                   printf("abort remove dead code (indirect temp)\n");
    273                goto done;
    274             }
    275 
    276 	    for (comp = 0; comp < 4; comp++) {
    277 	       const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
    278                if (swz <= SWIZZLE_W) {
    279                   if ((read_mask & (1 << swz)) == 0)
    280                      continue;
    281                   tempRead[index][swz] = GL_TRUE;
    282                }
    283 	    }
    284          }
    285       }
    286 
    287       /* check dst reg */
    288       if (inst->DstReg.File == PROGRAM_TEMPORARY) {
    289          assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
    290 
    291          if (inst->DstReg.RelAddr) {
    292             if (dbg)
    293                printf("abort remove dead code (indirect temp)\n");
    294             goto done;
    295          }
    296       }
    297    }
    298 
    299    /* find instructions that write to dead registers, flag for removal */
    300    for (i = 0; i < prog->arb.NumInstructions; i++) {
    301       struct prog_instruction *inst = prog->arb.Instructions + i;
    302       const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
    303 
    304       if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
    305          GLint chan, index = inst->DstReg.Index;
    306 
    307 	 for (chan = 0; chan < 4; chan++) {
    308 	    if (!tempRead[index][chan] &&
    309 		inst->DstReg.WriteMask & (1 << chan)) {
    310 	       if (dbg) {
    311 		  printf("Remove writemask on %u.%c\n", i,
    312 			       chan == 3 ? 'w' : 'x' + chan);
    313 	       }
    314 	       inst->DstReg.WriteMask &= ~(1 << chan);
    315 	       rem++;
    316 	    }
    317 	 }
    318 
    319 	 if (inst->DstReg.WriteMask == 0) {
    320 	    /* If we cleared all writes, the instruction can be removed. */
    321 	    if (dbg)
    322 	       printf("Remove instruction %u: \n", i);
    323 	    removeInst[i] = GL_TRUE;
    324 	 }
    325       }
    326    }
    327 
    328    /* now remove the instructions which aren't needed */
    329    rem = remove_instructions(prog, removeInst, mem_ctx);
    330 
    331    if (dbg) {
    332       printf("Optimize: End dead code removal.\n");
    333       printf("  %u channel writes removed\n", rem);
    334       printf("  %u instructions removed\n", rem);
    335       /*_mesa_print_program(prog);*/
    336    }
    337 
    338 done:
    339    free(removeInst);
    340    return rem != 0;
    341 }
    342 
    343 
    344 enum inst_use
    345 {
    346    READ,
    347    WRITE,
    348    FLOW,
    349    END
    350 };
    351 
    352 
    353 /**
    354  * Scan forward in program from 'start' for the next occurances of TEMP[index].
    355  * We look if an instruction reads the component given by the masks and if they
    356  * are overwritten.
    357  * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
    358  * that we can't look further.
    359  */
    360 static enum inst_use
    361 find_next_use(const struct gl_program *prog,
    362               GLuint start,
    363               GLuint index,
    364               GLuint mask)
    365 {
    366    GLuint i;
    367 
    368    for (i = start; i < prog->arb.NumInstructions; i++) {
    369       const struct prog_instruction *inst = prog->arb.Instructions + i;
    370       switch (inst->Opcode) {
    371       case OPCODE_BGNLOOP:
    372       case OPCODE_BGNSUB:
    373       case OPCODE_CAL:
    374       case OPCODE_CONT:
    375       case OPCODE_IF:
    376       case OPCODE_ELSE:
    377       case OPCODE_ENDIF:
    378       case OPCODE_ENDLOOP:
    379       case OPCODE_ENDSUB:
    380       case OPCODE_RET:
    381          return FLOW;
    382       case OPCODE_END:
    383          return END;
    384       default:
    385          {
    386             const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
    387             GLuint j;
    388             for (j = 0; j < numSrc; j++) {
    389                if (inst->SrcReg[j].RelAddr ||
    390                    (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
    391                    inst->SrcReg[j].Index == (GLint)index &&
    392                    (get_src_arg_mask(inst,j,NO_MASK) & mask)))
    393                   return READ;
    394             }
    395             if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
    396                 inst->DstReg.File == PROGRAM_TEMPORARY &&
    397                 inst->DstReg.Index == index) {
    398                mask &= ~inst->DstReg.WriteMask;
    399                if (mask == 0)
    400                   return WRITE;
    401             }
    402          }
    403       }
    404    }
    405    return END;
    406 }
    407 
    408 
    409 /**
    410  * Is the given instruction opcode a flow-control opcode?
    411  * XXX maybe move this into prog_instruction.[ch]
    412  */
    413 static GLboolean
    414 _mesa_is_flow_control_opcode(enum prog_opcode opcode)
    415 {
    416    switch (opcode) {
    417    case OPCODE_BGNLOOP:
    418    case OPCODE_BGNSUB:
    419    case OPCODE_CAL:
    420    case OPCODE_CONT:
    421    case OPCODE_IF:
    422    case OPCODE_ELSE:
    423    case OPCODE_END:
    424    case OPCODE_ENDIF:
    425    case OPCODE_ENDLOOP:
    426    case OPCODE_ENDSUB:
    427    case OPCODE_RET:
    428       return GL_TRUE;
    429    default:
    430       return GL_FALSE;
    431    }
    432 }
    433 
    434 
    435 /**
    436  * Test if the given instruction is a simple MOV (no conditional updating,
    437  * not relative addressing, no negation/abs, etc).
    438  */
    439 static GLboolean
    440 can_downward_mov_be_modifed(const struct prog_instruction *mov)
    441 {
    442    return
    443       mov->Opcode == OPCODE_MOV &&
    444       mov->SrcReg[0].RelAddr == 0 &&
    445       mov->SrcReg[0].Negate == 0 &&
    446       mov->DstReg.RelAddr == 0;
    447 }
    448 
    449 
    450 static GLboolean
    451 can_upward_mov_be_modifed(const struct prog_instruction *mov)
    452 {
    453    return
    454       can_downward_mov_be_modifed(mov) &&
    455       mov->DstReg.File == PROGRAM_TEMPORARY &&
    456       !mov->Saturate;
    457 }
    458 
    459 
    460 /**
    461  * Try to remove use of extraneous MOV instructions, to free them up for dead
    462  * code removal.
    463  */
    464 static void
    465 _mesa_remove_extra_move_use(struct gl_program *prog)
    466 {
    467    GLuint i, j;
    468 
    469    if (dbg) {
    470       printf("Optimize: Begin remove extra move use\n");
    471       _mesa_print_program(prog);
    472    }
    473 
    474    /*
    475     * Look for sequences such as this:
    476     *    MOV tmpX, arg0;
    477     *    ...
    478     *    FOO tmpY, tmpX, arg1;
    479     * and convert into:
    480     *    MOV tmpX, arg0;
    481     *    ...
    482     *    FOO tmpY, arg0, arg1;
    483     */
    484 
    485    for (i = 0; i + 1 < prog->arb.NumInstructions; i++) {
    486       const struct prog_instruction *mov = prog->arb.Instructions + i;
    487       GLuint dst_mask, src_mask;
    488       if (can_upward_mov_be_modifed(mov) == GL_FALSE)
    489          continue;
    490 
    491       /* Scanning the code, we maintain the components which are still active in
    492        * these two masks
    493        */
    494       dst_mask = mov->DstReg.WriteMask;
    495       src_mask = get_src_arg_mask(mov, 0, NO_MASK);
    496 
    497       /* Walk through remaining instructions until the or src reg gets
    498        * rewritten or we get into some flow-control, eliminating the use of
    499        * this MOV.
    500        */
    501       for (j = i + 1; j < prog->arb.NumInstructions; j++) {
    502          struct prog_instruction *inst2 = prog->arb.Instructions + j;
    503          GLuint arg;
    504 
    505 	 if (_mesa_is_flow_control_opcode(inst2->Opcode))
    506 	     break;
    507 
    508 	 /* First rewrite this instruction's args if appropriate. */
    509 	 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
    510 	    GLuint comp, read_mask;
    511 
    512 	    if (inst2->SrcReg[arg].File != mov->DstReg.File ||
    513 		inst2->SrcReg[arg].Index != mov->DstReg.Index ||
    514 		inst2->SrcReg[arg].RelAddr)
    515 	       continue;
    516             read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
    517 
    518 	    /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
    519              * components read still come from the mov instructions
    520              */
    521             if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
    522                (read_mask & dst_mask) == read_mask) {
    523                for (comp = 0; comp < 4; comp++) {
    524                   const GLuint inst2_swz =
    525                      GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
    526                   const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
    527                   inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
    528                   inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
    529                   inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
    530                                                   inst2_swz) & 0x1) << comp);
    531                }
    532                inst2->SrcReg[arg].File = mov->SrcReg[0].File;
    533                inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
    534             }
    535 	 }
    536 
    537 	 /* The source of MOV is written. This potentially deactivates some
    538           * components from the src and dst of the MOV instruction
    539           */
    540 	 if (inst2->DstReg.File == mov->DstReg.File &&
    541 	     (inst2->DstReg.RelAddr ||
    542 	      inst2->DstReg.Index == mov->DstReg.Index)) {
    543             dst_mask &= ~inst2->DstReg.WriteMask;
    544             src_mask = get_src_arg_mask(mov, 0, dst_mask);
    545          }
    546 
    547          /* Idem when the destination of mov is written */
    548 	 if (inst2->DstReg.File == mov->SrcReg[0].File &&
    549 	     (inst2->DstReg.RelAddr ||
    550 	      inst2->DstReg.Index == mov->SrcReg[0].Index)) {
    551             src_mask &= ~inst2->DstReg.WriteMask;
    552             dst_mask &= get_dst_mask_for_mov(mov, src_mask);
    553          }
    554          if (dst_mask == 0)
    555             break;
    556       }
    557    }
    558 
    559    if (dbg) {
    560       printf("Optimize: End remove extra move use.\n");
    561       /*_mesa_print_program(prog);*/
    562    }
    563 }
    564 
    565 
    566 /**
    567  * Complements dead_code_global. Try to remove code in block of code by
    568  * carefully monitoring the swizzles. Both functions should be merged into one
    569  * with a proper control flow graph
    570  */
    571 static GLboolean
    572 _mesa_remove_dead_code_local(struct gl_program *prog, void *mem_ctx)
    573 {
    574    GLboolean *removeInst;
    575    GLuint i, arg, rem = 0;
    576 
    577    removeInst =
    578       calloc(prog->arb.NumInstructions, sizeof(GLboolean));
    579 
    580    for (i = 0; i < prog->arb.NumInstructions; i++) {
    581       const struct prog_instruction *inst = prog->arb.Instructions + i;
    582       const GLuint index = inst->DstReg.Index;
    583       const GLuint mask = inst->DstReg.WriteMask;
    584       enum inst_use use;
    585 
    586       /* We must deactivate the pass as soon as some indirection is used */
    587       if (inst->DstReg.RelAddr)
    588          goto done;
    589       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
    590          if (inst->SrcReg[arg].RelAddr)
    591             goto done;
    592 
    593       if (_mesa_is_flow_control_opcode(inst->Opcode) ||
    594           _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
    595           inst->DstReg.File != PROGRAM_TEMPORARY ||
    596           inst->DstReg.RelAddr)
    597          continue;
    598 
    599       use = find_next_use(prog, i+1, index, mask);
    600       if (use == WRITE || use == END)
    601          removeInst[i] = GL_TRUE;
    602    }
    603 
    604    rem = remove_instructions(prog, removeInst, mem_ctx);
    605 
    606 done:
    607    free(removeInst);
    608    return rem != 0;
    609 }
    610 
    611 
    612 /**
    613  * Try to inject the destination of mov as the destination of inst and recompute
    614  * the swizzles operators for the sources of inst if required. Return GL_TRUE
    615  * of the substitution was possible, GL_FALSE otherwise
    616  */
    617 static GLboolean
    618 _mesa_merge_mov_into_inst(struct prog_instruction *inst,
    619                           const struct prog_instruction *mov)
    620 {
    621    /* Indirection table which associates destination and source components for
    622     * the mov instruction
    623     */
    624    const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
    625 
    626    /* Some components are not written by inst. We cannot remove the mov */
    627    if (mask != (inst->DstReg.WriteMask & mask))
    628       return GL_FALSE;
    629 
    630    inst->Saturate |= mov->Saturate;
    631 
    632    /* Depending on the instruction, we may need to recompute the swizzles.
    633     * Also, some other instructions (like TEX) are not linear. We will only
    634     * consider completely active sources and destinations
    635     */
    636    switch (inst->Opcode) {
    637 
    638    /* Carstesian instructions: we compute the swizzle */
    639    case OPCODE_MOV:
    640    case OPCODE_MIN:
    641    case OPCODE_MAX:
    642    case OPCODE_ABS:
    643    case OPCODE_ADD:
    644    case OPCODE_MAD:
    645    case OPCODE_MUL:
    646    case OPCODE_SUB:
    647    {
    648       GLuint dst_to_src_comp[4] = {0,0,0,0};
    649       GLuint dst_comp, arg;
    650       for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
    651          if (mov->DstReg.WriteMask & (1 << dst_comp)) {
    652             const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
    653             assert(src_comp < 4);
    654             dst_to_src_comp[dst_comp] = src_comp;
    655          }
    656       }
    657 
    658       /* Patch each source of the instruction */
    659       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
    660          const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
    661          inst->SrcReg[arg].Swizzle = 0;
    662 
    663          /* Reset each active component of the swizzle */
    664          for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
    665             GLuint src_comp, arg_comp;
    666             if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
    667                continue;
    668             src_comp = dst_to_src_comp[dst_comp];
    669             assert(src_comp < 4);
    670             arg_comp = GET_SWZ(arg_swz, src_comp);
    671             assert(arg_comp < 4);
    672             inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
    673          }
    674       }
    675       inst->DstReg = mov->DstReg;
    676       return GL_TRUE;
    677    }
    678 
    679    /* Dot products and scalar instructions: we only change the destination */
    680    case OPCODE_RCP:
    681    case OPCODE_SIN:
    682    case OPCODE_COS:
    683    case OPCODE_RSQ:
    684    case OPCODE_POW:
    685    case OPCODE_EX2:
    686    case OPCODE_LOG:
    687    case OPCODE_DP2:
    688    case OPCODE_DP3:
    689    case OPCODE_DP4:
    690       inst->DstReg = mov->DstReg;
    691       return GL_TRUE;
    692 
    693    /* All other instructions require fully active components with no swizzle */
    694    default:
    695       if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
    696           inst->DstReg.WriteMask != WRITEMASK_XYZW)
    697          return GL_FALSE;
    698       inst->DstReg = mov->DstReg;
    699       return GL_TRUE;
    700    }
    701 }
    702 
    703 
    704 /**
    705  * Try to remove extraneous MOV instructions from the given program.
    706  */
    707 static GLboolean
    708 _mesa_remove_extra_moves(struct gl_program *prog, void *mem_ctx)
    709 {
    710    GLboolean *removeInst; /* per-instruction removal flag */
    711    GLuint i, rem = 0, nesting = 0;
    712 
    713    if (dbg) {
    714       printf("Optimize: Begin remove extra moves\n");
    715       _mesa_print_program(prog);
    716    }
    717 
    718    removeInst =
    719       calloc(prog->arb.NumInstructions, sizeof(GLboolean));
    720 
    721    /*
    722     * Look for sequences such as this:
    723     *    FOO tmpX, arg0, arg1;
    724     *    MOV tmpY, tmpX;
    725     * and convert into:
    726     *    FOO tmpY, arg0, arg1;
    727     */
    728 
    729    for (i = 0; i < prog->arb.NumInstructions; i++) {
    730       const struct prog_instruction *mov = prog->arb.Instructions + i;
    731 
    732       switch (mov->Opcode) {
    733       case OPCODE_BGNLOOP:
    734       case OPCODE_BGNSUB:
    735       case OPCODE_IF:
    736          nesting++;
    737          break;
    738       case OPCODE_ENDLOOP:
    739       case OPCODE_ENDSUB:
    740       case OPCODE_ENDIF:
    741          nesting--;
    742          break;
    743       case OPCODE_MOV:
    744          if (i > 0 &&
    745              can_downward_mov_be_modifed(mov) &&
    746              mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
    747              nesting == 0)
    748          {
    749 
    750             /* see if this MOV can be removed */
    751             const GLuint id = mov->SrcReg[0].Index;
    752             struct prog_instruction *prevInst;
    753             GLuint prevI;
    754 
    755             /* get pointer to previous instruction */
    756             prevI = i - 1;
    757             while (prevI > 0 && removeInst[prevI])
    758                prevI--;
    759             prevInst = prog->arb.Instructions + prevI;
    760 
    761             if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
    762                 prevInst->DstReg.Index == id &&
    763                 prevInst->DstReg.RelAddr == 0) {
    764 
    765                const GLuint dst_mask = prevInst->DstReg.WriteMask;
    766                enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
    767 
    768                if (next_use == WRITE || next_use == END) {
    769                   /* OK, we can safely remove this MOV instruction.
    770                    * Transform:
    771                    *   prevI: FOO tempIndex, x, y;
    772                    *       i: MOV z, tempIndex;
    773                    * Into:
    774                    *   prevI: FOO z, x, y;
    775                    */
    776                   if (_mesa_merge_mov_into_inst(prevInst, mov)) {
    777                      removeInst[i] = GL_TRUE;
    778                      if (dbg) {
    779                         printf("Remove MOV at %u\n", i);
    780                         printf("new prev inst %u: ", prevI);
    781                         _mesa_print_instruction(prevInst);
    782                      }
    783                   }
    784                }
    785             }
    786          }
    787          break;
    788       default:
    789          ; /* nothing */
    790       }
    791    }
    792 
    793    /* now remove the instructions which aren't needed */
    794    rem = remove_instructions(prog, removeInst, mem_ctx);
    795 
    796    free(removeInst);
    797 
    798    if (dbg) {
    799       printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
    800       /*_mesa_print_program(prog);*/
    801    }
    802 
    803    return rem != 0;
    804 }
    805 
    806 
    807 /** A live register interval */
    808 struct interval
    809 {
    810    GLuint Reg;         /** The temporary register index */
    811    GLuint Start, End;  /** Start/end instruction numbers */
    812 };
    813 
    814 
    815 /** A list of register intervals */
    816 struct interval_list
    817 {
    818    GLuint Num;
    819    struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
    820 };
    821 
    822 
    823 static void
    824 append_interval(struct interval_list *list, const struct interval *inv)
    825 {
    826    list->Intervals[list->Num++] = *inv;
    827 }
    828 
    829 
    830 /** Insert interval inv into list, sorted by interval end */
    831 static void
    832 insert_interval_by_end(struct interval_list *list, const struct interval *inv)
    833 {
    834    /* XXX we could do a binary search insertion here since list is sorted */
    835    GLint i = list->Num - 1;
    836    while (i >= 0 && list->Intervals[i].End > inv->End) {
    837       list->Intervals[i + 1] = list->Intervals[i];
    838       i--;
    839    }
    840    list->Intervals[i + 1] = *inv;
    841    list->Num++;
    842 
    843 #ifdef DEBUG
    844    {
    845       GLuint i;
    846       for (i = 0; i + 1 < list->Num; i++) {
    847          assert(list->Intervals[i].End <= list->Intervals[i + 1].End);
    848       }
    849    }
    850 #endif
    851 }
    852 
    853 
    854 /** Remove the given interval from the interval list */
    855 static void
    856 remove_interval(struct interval_list *list, const struct interval *inv)
    857 {
    858    /* XXX we could binary search since list is sorted */
    859    GLuint k;
    860    for (k = 0; k < list->Num; k++) {
    861       if (list->Intervals[k].Reg == inv->Reg) {
    862          /* found, remove it */
    863          assert(list->Intervals[k].Start == inv->Start);
    864          assert(list->Intervals[k].End == inv->End);
    865          while (k < list->Num - 1) {
    866             list->Intervals[k] = list->Intervals[k + 1];
    867             k++;
    868          }
    869          list->Num--;
    870          return;
    871       }
    872    }
    873 }
    874 
    875 
    876 /** called by qsort() */
    877 static int
    878 compare_start(const void *a, const void *b)
    879 {
    880    const struct interval *ia = (const struct interval *) a;
    881    const struct interval *ib = (const struct interval *) b;
    882    if (ia->Start < ib->Start)
    883       return -1;
    884    else if (ia->Start > ib->Start)
    885       return +1;
    886    else
    887       return 0;
    888 }
    889 
    890 
    891 /** sort the interval list according to interval starts */
    892 static void
    893 sort_interval_list_by_start(struct interval_list *list)
    894 {
    895    qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
    896 #ifdef DEBUG
    897    {
    898       GLuint i;
    899       for (i = 0; i + 1 < list->Num; i++) {
    900          assert(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
    901       }
    902    }
    903 #endif
    904 }
    905 
    906 struct loop_info
    907 {
    908    GLuint Start, End;  /**< Start, end instructions of loop */
    909 };
    910 
    911 /**
    912  * Update the intermediate interval info for register 'index' and
    913  * instruction 'ic'.
    914  */
    915 static void
    916 update_interval(GLint intBegin[], GLint intEnd[],
    917 		struct loop_info *loopStack, GLuint loopStackDepth,
    918 		GLuint index, GLuint ic)
    919 {
    920    unsigned i;
    921    GLuint begin = ic;
    922    GLuint end = ic;
    923 
    924    /* If the register is used in a loop, extend its lifetime through the end
    925     * of the outermost loop that doesn't contain its definition.
    926     */
    927    for (i = 0; i < loopStackDepth; i++) {
    928       if (intBegin[index] < loopStack[i].Start) {
    929 	 end = loopStack[i].End;
    930 	 break;
    931       }
    932    }
    933 
    934    /* Variables that are live at the end of a loop will also be live at the
    935     * beginning, so an instruction inside of a loop should have its live
    936     * interval begin at the start of the outermost loop.
    937     */
    938    if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
    939       begin = loopStack[0].Start;
    940    }
    941 
    942    assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
    943    if (intBegin[index] == -1) {
    944       assert(intEnd[index] == -1);
    945       intBegin[index] = begin;
    946       intEnd[index] = end;
    947    }
    948    else {
    949       intEnd[index] = end;
    950    }
    951 }
    952 
    953 
    954 /**
    955  * Find first/last instruction that references each temporary register.
    956  */
    957 GLboolean
    958 _mesa_find_temp_intervals(const struct prog_instruction *instructions,
    959                           GLuint numInstructions,
    960                           GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
    961                           GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
    962 {
    963    struct loop_info loopStack[MAX_LOOP_NESTING];
    964    GLuint loopStackDepth = 0;
    965    GLuint i;
    966 
    967    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
    968       intBegin[i] = intEnd[i] = -1;
    969    }
    970 
    971    /* Scan instructions looking for temporary registers */
    972    for (i = 0; i < numInstructions; i++) {
    973       const struct prog_instruction *inst = instructions + i;
    974       if (inst->Opcode == OPCODE_BGNLOOP) {
    975          loopStack[loopStackDepth].Start = i;
    976          loopStack[loopStackDepth].End = inst->BranchTarget;
    977          loopStackDepth++;
    978       }
    979       else if (inst->Opcode == OPCODE_ENDLOOP) {
    980          loopStackDepth--;
    981       }
    982       else if (inst->Opcode == OPCODE_CAL) {
    983          return GL_FALSE;
    984       }
    985       else {
    986          const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
    987          GLuint j;
    988          for (j = 0; j < numSrc; j++) {
    989             if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
    990                const GLuint index = inst->SrcReg[j].Index;
    991                if (inst->SrcReg[j].RelAddr)
    992                   return GL_FALSE;
    993                update_interval(intBegin, intEnd, loopStack, loopStackDepth,
    994 			       index, i);
    995             }
    996          }
    997          if (inst->DstReg.File == PROGRAM_TEMPORARY) {
    998             const GLuint index = inst->DstReg.Index;
    999             if (inst->DstReg.RelAddr)
   1000                return GL_FALSE;
   1001             update_interval(intBegin, intEnd, loopStack, loopStackDepth,
   1002 			    index, i);
   1003          }
   1004       }
   1005    }
   1006 
   1007    return GL_TRUE;
   1008 }
   1009 
   1010 
   1011 /**
   1012  * Find the live intervals for each temporary register in the program.
   1013  * For register R, the interval [A,B] indicates that R is referenced
   1014  * from instruction A through instruction B.
   1015  * Special consideration is needed for loops and subroutines.
   1016  * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
   1017  */
   1018 static GLboolean
   1019 find_live_intervals(struct gl_program *prog,
   1020                     struct interval_list *liveIntervals)
   1021 {
   1022    GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   1023    GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   1024    GLuint i;
   1025 
   1026    /*
   1027     * Note: we'll return GL_FALSE below if we find relative indexing
   1028     * into the TEMP register file.  We can't handle that yet.
   1029     * We also give up on subroutines for now.
   1030     */
   1031 
   1032    if (dbg) {
   1033       printf("Optimize: Begin find intervals\n");
   1034    }
   1035 
   1036    /* build intermediate arrays */
   1037    if (!_mesa_find_temp_intervals(prog->arb.Instructions,
   1038                                   prog->arb.NumInstructions,
   1039                                   intBegin, intEnd))
   1040       return GL_FALSE;
   1041 
   1042    /* Build live intervals list from intermediate arrays */
   1043    liveIntervals->Num = 0;
   1044    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
   1045       if (intBegin[i] >= 0) {
   1046          struct interval inv;
   1047          inv.Reg = i;
   1048          inv.Start = intBegin[i];
   1049          inv.End = intEnd[i];
   1050          append_interval(liveIntervals, &inv);
   1051       }
   1052    }
   1053 
   1054    /* Sort the list according to interval starts */
   1055    sort_interval_list_by_start(liveIntervals);
   1056 
   1057    if (dbg) {
   1058       /* print interval info */
   1059       for (i = 0; i < liveIntervals->Num; i++) {
   1060          const struct interval *inv = liveIntervals->Intervals + i;
   1061          printf("Reg[%d] live [%d, %d]:",
   1062                       inv->Reg, inv->Start, inv->End);
   1063          if (1) {
   1064             GLuint j;
   1065             for (j = 0; j < inv->Start; j++)
   1066                printf(" ");
   1067             for (j = inv->Start; j <= inv->End; j++)
   1068                printf("x");
   1069          }
   1070          printf("\n");
   1071       }
   1072    }
   1073 
   1074    return GL_TRUE;
   1075 }
   1076 
   1077 
   1078 /** Scan the array of used register flags to find free entry */
   1079 static GLint
   1080 alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
   1081 {
   1082    GLuint k;
   1083    for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
   1084       if (!usedRegs[k]) {
   1085          usedRegs[k] = GL_TRUE;
   1086          return k;
   1087       }
   1088    }
   1089    return -1;
   1090 }
   1091 
   1092 
   1093 /**
   1094  * This function implements "Linear Scan Register Allocation" to reduce
   1095  * the number of temporary registers used by the program.
   1096  *
   1097  * We compute the "live interval" for all temporary registers then
   1098  * examine the overlap of the intervals to allocate new registers.
   1099  * Basically, if two intervals do not overlap, they can use the same register.
   1100  */
   1101 static void
   1102 _mesa_reallocate_registers(struct gl_program *prog)
   1103 {
   1104    struct interval_list liveIntervals;
   1105    GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   1106    GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   1107    GLuint i;
   1108    GLint maxTemp = -1;
   1109 
   1110    if (dbg) {
   1111       printf("Optimize: Begin live-interval register reallocation\n");
   1112       _mesa_print_program(prog);
   1113    }
   1114 
   1115    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
   1116       registerMap[i] = -1;
   1117       usedRegs[i] = GL_FALSE;
   1118    }
   1119 
   1120    if (!find_live_intervals(prog, &liveIntervals)) {
   1121       if (dbg)
   1122          printf("Aborting register reallocation\n");
   1123       return;
   1124    }
   1125 
   1126    {
   1127       struct interval_list activeIntervals;
   1128       activeIntervals.Num = 0;
   1129 
   1130       /* loop over live intervals, allocating a new register for each */
   1131       for (i = 0; i < liveIntervals.Num; i++) {
   1132          const struct interval *live = liveIntervals.Intervals + i;
   1133 
   1134          if (dbg)
   1135             printf("Consider register %u\n", live->Reg);
   1136 
   1137          /* Expire old intervals.  Intervals which have ended with respect
   1138           * to the live interval can have their remapped registers freed.
   1139           */
   1140          {
   1141             GLint j;
   1142             for (j = 0; j < (GLint) activeIntervals.Num; j++) {
   1143                const struct interval *inv = activeIntervals.Intervals + j;
   1144                if (inv->End >= live->Start) {
   1145                   /* Stop now.  Since the activeInterval list is sorted
   1146                    * we know we don't have to go further.
   1147                    */
   1148                   break;
   1149                }
   1150                else {
   1151                   /* Interval 'inv' has expired */
   1152                   const GLint regNew = registerMap[inv->Reg];
   1153                   assert(regNew >= 0);
   1154 
   1155                   if (dbg)
   1156                      printf("  expire interval for reg %u\n", inv->Reg);
   1157 
   1158                   /* remove interval j from active list */
   1159                   remove_interval(&activeIntervals, inv);
   1160                   j--;  /* counter-act j++ in for-loop above */
   1161 
   1162                   /* return register regNew to the free pool */
   1163                   if (dbg)
   1164                      printf("  free reg %d\n", regNew);
   1165                   assert(usedRegs[regNew] == GL_TRUE);
   1166                   usedRegs[regNew] = GL_FALSE;
   1167                }
   1168             }
   1169          }
   1170 
   1171          /* find a free register for this live interval */
   1172          {
   1173             const GLint k = alloc_register(usedRegs);
   1174             if (k < 0) {
   1175                /* out of registers, give up */
   1176                return;
   1177             }
   1178             registerMap[live->Reg] = k;
   1179             maxTemp = MAX2(maxTemp, k);
   1180             if (dbg)
   1181                printf("  remap register %u -> %d\n", live->Reg, k);
   1182          }
   1183 
   1184          /* Insert this live interval into the active list which is sorted
   1185           * by increasing end points.
   1186           */
   1187          insert_interval_by_end(&activeIntervals, live);
   1188       }
   1189    }
   1190 
   1191    if (maxTemp + 1 < (GLint) liveIntervals.Num) {
   1192       /* OK, we've reduced the number of registers needed.
   1193        * Scan the program and replace all the old temporary register
   1194        * indexes with the new indexes.
   1195        */
   1196       replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
   1197 
   1198       prog->arb.NumTemporaries = maxTemp + 1;
   1199    }
   1200 
   1201    if (dbg) {
   1202       printf("Optimize: End live-interval register reallocation\n");
   1203       printf("Num temp regs before: %u  after: %u\n",
   1204                    liveIntervals.Num, maxTemp + 1);
   1205       _mesa_print_program(prog);
   1206    }
   1207 }
   1208 
   1209 
   1210 #if 0
   1211 static void
   1212 print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
   1213    fprintf(stderr, "%s (%u inst):\n", txt, program->arb.NumInstructions);
   1214    _mesa_print_program(program);
   1215    _mesa_print_program_parameters(ctx, program);
   1216    fprintf(stderr, "\n\n");
   1217 }
   1218 #endif
   1219 
   1220 /**
   1221  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
   1222  * instruction is the first instruction to write to register T0.  The are
   1223  * several lowering passes done in GLSL IR (e.g. branches and
   1224  * relative addressing) that create a large number of conditional assignments
   1225  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
   1226  *
   1227  * Here is why this conversion is safe:
   1228  * CMP T0, T1 T2 T0 can be expanded to:
   1229  * if (T1 < 0.0)
   1230  * 	MOV T0, T2;
   1231  * else
   1232  * 	MOV T0, T0;
   1233  *
   1234  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
   1235  * as the original program.  If (T1 < 0.0) evaluates to false, executing
   1236  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
   1237  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
   1238  * because any instruction that was going to read from T0 after this was going
   1239  * to read a garbage value anyway.
   1240  */
   1241 static void
   1242 _mesa_simplify_cmp(struct gl_program * program)
   1243 {
   1244    GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   1245    GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
   1246    GLuint i;
   1247 
   1248    if (dbg) {
   1249       printf("Optimize: Begin reads without writes\n");
   1250       _mesa_print_program(program);
   1251    }
   1252 
   1253    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
   1254       tempWrites[i] = 0;
   1255    }
   1256 
   1257    for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
   1258       outputWrites[i] = 0;
   1259    }
   1260 
   1261    for (i = 0; i < program->arb.NumInstructions; i++) {
   1262       struct prog_instruction *inst = program->arb.Instructions + i;
   1263       GLuint prevWriteMask;
   1264 
   1265       /* Give up if we encounter relative addressing or flow control. */
   1266       if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
   1267          return;
   1268       }
   1269 
   1270       if (inst->DstReg.File == PROGRAM_OUTPUT) {
   1271          assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
   1272          prevWriteMask = outputWrites[inst->DstReg.Index];
   1273          outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
   1274       } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
   1275          assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
   1276          prevWriteMask = tempWrites[inst->DstReg.Index];
   1277          tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
   1278       } else {
   1279          /* No other register type can be a destination register. */
   1280          continue;
   1281       }
   1282 
   1283       /* For a CMP to be considered a conditional write, the destination
   1284        * register and source register two must be the same. */
   1285       if (inst->Opcode == OPCODE_CMP
   1286           && !(inst->DstReg.WriteMask & prevWriteMask)
   1287           && inst->SrcReg[2].File == inst->DstReg.File
   1288           && inst->SrcReg[2].Index == inst->DstReg.Index
   1289           && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
   1290 
   1291          inst->Opcode = OPCODE_MOV;
   1292          inst->SrcReg[0] = inst->SrcReg[1];
   1293 
   1294 	 /* Unused operands are expected to have the file set to
   1295 	  * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
   1296 	  * all of the sources.
   1297 	  */
   1298 	 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
   1299 	 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
   1300 	 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
   1301 	 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
   1302       }
   1303    }
   1304    if (dbg) {
   1305       printf("Optimize: End reads without writes\n");
   1306       _mesa_print_program(program);
   1307    }
   1308 }
   1309 
   1310 /**
   1311  * Apply optimizations to the given program to eliminate unnecessary
   1312  * instructions, temp regs, etc.
   1313  */
   1314 void
   1315 _mesa_optimize_program(struct gl_program *program, void *mem_ctx)
   1316 {
   1317    GLboolean any_change;
   1318 
   1319    _mesa_simplify_cmp(program);
   1320    /* Stop when no modifications were output */
   1321    do {
   1322       any_change = GL_FALSE;
   1323       _mesa_remove_extra_move_use(program);
   1324       if (_mesa_remove_dead_code_global(program, mem_ctx))
   1325          any_change = GL_TRUE;
   1326       if (_mesa_remove_extra_moves(program, mem_ctx))
   1327          any_change = GL_TRUE;
   1328       if (_mesa_remove_dead_code_local(program, mem_ctx))
   1329          any_change = GL_TRUE;
   1330 
   1331       any_change = _mesa_constant_fold(program) || any_change;
   1332       _mesa_reallocate_registers(program);
   1333    } while (any_change);
   1334 }
   1335 
   1336