Home | History | Annotate | Download | only in program
      1 /*
      2  * Mesa 3-D graphics library
      3  * Version:  7.5
      4  *
      5  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included
     15  * in all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     23  */
     24 
     25 
     26 
     27 #include "main/glheader.h"
     28 #include "main/context.h"
     29 #include "main/macros.h"
     30 #include "program.h"
     31 #include "prog_instruction.h"
     32 #include "prog_optimize.h"
     33 #include "prog_print.h"
     34 
     35 
     36 #define MAX_LOOP_NESTING 50
     37 /* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
     38  * register allocate many temporary values into that small number of
     39  * temps.  So allow large temporary indices coming into the register
     40  * allocator.
     41  */
     42 #define REG_ALLOCATE_MAX_PROGRAM_TEMPS	((1 << INST_INDEX_BITS) - 1)
     43 
     44 static GLboolean dbg = GL_FALSE;
     45 
     46 #define NO_MASK 0xf
     47 
     48 /**
     49  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
     50  * are read from the given src in this instruction, We also provide
     51  * one optional masks which may mask other components in the dst
     52  * register
     53  */
     54 static GLuint
     55 get_src_arg_mask(const struct prog_instruction *inst,
     56                  GLuint arg, GLuint dst_mask)
     57 {
     58    GLuint read_mask, channel_mask;
     59    GLuint comp;
     60 
     61    ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
     62 
     63    /* Form the dst register, find the written channels */
     64    if (inst->CondUpdate) {
     65       channel_mask = WRITEMASK_XYZW;
     66    }
     67    else {
     68       switch (inst->Opcode) {
     69       case OPCODE_MOV:
     70       case OPCODE_MIN:
     71       case OPCODE_MAX:
     72       case OPCODE_ABS:
     73       case OPCODE_ADD:
     74       case OPCODE_MAD:
     75       case OPCODE_MUL:
     76       case OPCODE_SUB:
     77       case OPCODE_CMP:
     78       case OPCODE_FLR:
     79       case OPCODE_FRC:
     80       case OPCODE_LRP:
     81       case OPCODE_SEQ:
     82       case OPCODE_SGE:
     83       case OPCODE_SGT:
     84       case OPCODE_SLE:
     85       case OPCODE_SLT:
     86       case OPCODE_SNE:
     87       case OPCODE_SSG:
     88          channel_mask = inst->DstReg.WriteMask & dst_mask;
     89          break;
     90       case OPCODE_RCP:
     91       case OPCODE_SIN:
     92       case OPCODE_COS:
     93       case OPCODE_RSQ:
     94       case OPCODE_POW:
     95       case OPCODE_EX2:
     96       case OPCODE_LOG:
     97          channel_mask = WRITEMASK_X;
     98          break;
     99       case OPCODE_DP2:
    100          channel_mask = WRITEMASK_XY;
    101          break;
    102       case OPCODE_DP3:
    103       case OPCODE_XPD:
    104          channel_mask = WRITEMASK_XYZ;
    105          break;
    106       default:
    107          channel_mask = WRITEMASK_XYZW;
    108          break;
    109       }
    110    }
    111 
    112    /* Now, given the src swizzle and the written channels, find which
    113     * components are actually read
    114     */
    115    read_mask = 0x0;
    116    for (comp = 0; comp < 4; ++comp) {
    117       const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
    118       ASSERT(coord < 4);
    119       if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
    120          read_mask |= 1 << coord;
    121    }
    122 
    123    return read_mask;
    124 }
    125 
    126 
    127 /**
    128  * For a MOV instruction, compute a write mask when src register also has
    129  * a mask
    130  */
    131 static GLuint
    132 get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
    133 {
    134    const GLuint mask = mov->DstReg.WriteMask;
    135    GLuint comp;
    136    GLuint updated_mask = 0x0;
    137 
    138    ASSERT(mov->Opcode == OPCODE_MOV);
    139 
    140    for (comp = 0; comp < 4; ++comp) {
    141       GLuint src_comp;
    142       if ((mask & (1 << comp)) == 0)
    143          continue;
    144       src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
    145       if ((src_mask & (1 << src_comp)) == 0)
    146          continue;
    147       updated_mask |= 1 << comp;
    148    }
    149 
    150    return updated_mask;
    151 }
    152 
    153 
    154 /**
    155  * Ensure that the swizzle is regular.  That is, all of the swizzle
    156  * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
    157  */
    158 static GLboolean
    159 is_swizzle_regular(GLuint swz)
    160 {
    161    return GET_SWZ(swz,0) <= SWIZZLE_W &&
    162           GET_SWZ(swz,1) <= SWIZZLE_W &&
    163           GET_SWZ(swz,2) <= SWIZZLE_W &&
    164           GET_SWZ(swz,3) <= SWIZZLE_W;
    165 }
    166 
    167 
    168 /**
    169  * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
    170  * \return number of instructions removed
    171  */
    172 static GLuint
    173 remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
    174 {
    175    GLint i, removeEnd = 0, removeCount = 0;
    176    GLuint totalRemoved = 0;
    177 
    178    /* go backward */
    179    for (i = prog->NumInstructions - 1; i >= 0; i--) {
    180       if (removeFlags[i]) {
    181          totalRemoved++;
    182          if (removeCount == 0) {
    183             /* begin a run of instructions to remove */
    184             removeEnd = i;
    185             removeCount = 1;
    186          }
    187          else {
    188             /* extend the run of instructions to remove */
    189             removeCount++;
    190          }
    191       }
    192       else {
    193          /* don't remove this instruction, but check if the preceeding
    194           * instructions are to be removed.
    195           */
    196          if (removeCount > 0) {
    197             GLint removeStart = removeEnd - removeCount + 1;
    198             _mesa_delete_instructions(prog, removeStart, removeCount);
    199             removeStart = removeCount = 0; /* reset removal info */
    200          }
    201       }
    202    }
    203    /* Finish removing if the first instruction was to be removed. */
    204    if (removeCount > 0) {
    205       GLint removeStart = removeEnd - removeCount + 1;
    206       _mesa_delete_instructions(prog, removeStart, removeCount);
    207    }
    208    return totalRemoved;
    209 }
    210 
    211 
    212 /**
    213  * Remap register indexes according to map.
    214  * \param prog  the program to search/replace
    215  * \param file  the type of register file to search/replace
    216  * \param map  maps old register indexes to new indexes
    217  */
    218 static void
    219 replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
    220 {
    221    GLuint i;
    222 
    223    for (i = 0; i < prog->NumInstructions; i++) {
    224       struct prog_instruction *inst = prog->Instructions + i;
    225       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
    226       GLuint j;
    227       for (j = 0; j < numSrc; j++) {
    228          if (inst->SrcReg[j].File == file) {
    229             GLuint index = inst->SrcReg[j].Index;
    230             ASSERT(map[index] >= 0);
    231             inst->SrcReg[j].Index = map[index];
    232          }
    233       }
    234       if (inst->DstReg.File == file) {
    235          const GLuint index = inst->DstReg.Index;
    236          ASSERT(map[index] >= 0);
    237          inst->DstReg.Index = map[index];
    238       }
    239    }
    240 }
    241 
    242 
    243 /**
    244  * Remove dead instructions from the given program.
    245  * This is very primitive for now.  Basically look for temp registers
    246  * that are written to but never read.  Remove any instructions that
    247  * write to such registers.  Be careful with condition code setters.
    248  */
    249 static GLboolean
    250 _mesa_remove_dead_code_global(struct gl_program *prog)
    251 {
    252    GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
    253    GLboolean *removeInst; /* per-instruction removal flag */
    254    GLuint i, rem = 0, comp;
    255 
    256    memset(tempRead, 0, sizeof(tempRead));
    257 
    258    if (dbg) {
    259       printf("Optimize: Begin dead code removal\n");
    260       /*_mesa_print_program(prog);*/
    261    }
    262 
    263    removeInst = (GLboolean *)
    264       calloc(1, prog->NumInstructions * sizeof(GLboolean));
    265 
    266    /* Determine which temps are read and written */
    267    for (i = 0; i < prog->NumInstructions; i++) {
    268       const struct prog_instruction *inst = prog->Instructions + i;
    269       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
    270       GLuint j;
    271 
    272       /* check src regs */
    273       for (j = 0; j < numSrc; j++) {
    274          if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
    275             const GLuint index = inst->SrcReg[j].Index;
    276             GLuint read_mask;
    277             ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
    278 	    read_mask = get_src_arg_mask(inst, j, NO_MASK);
    279 
    280             if (inst->SrcReg[j].RelAddr) {
    281                if (dbg)
    282                   printf("abort remove dead code (indirect temp)\n");
    283                goto done;
    284             }
    285 
    286 	    for (comp = 0; comp < 4; comp++) {
    287 	       const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
    288 	       ASSERT(swz < 4);
    289                if ((read_mask & (1 << swz)) == 0)
    290 		  continue;
    291                if (swz <= SWIZZLE_W)
    292                   tempRead[index][swz] = GL_TRUE;
    293 	    }
    294          }
    295       }
    296 
    297       /* check dst reg */
    298       if (inst->DstReg.File == PROGRAM_TEMPORARY) {
    299          const GLuint index = inst->DstReg.Index;
    300          ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
    301 
    302          if (inst->DstReg.RelAddr) {
    303             if (dbg)
    304                printf("abort remove dead code (indirect temp)\n");
    305             goto done;
    306          }
    307 
    308          if (inst->CondUpdate) {
    309             /* If we're writing to this register and setting condition
    310              * codes we cannot remove the instruction.  Prevent removal
    311              * by setting the 'read' flag.
    312              */
    313             tempRead[index][0] = GL_TRUE;
    314             tempRead[index][1] = GL_TRUE;
    315             tempRead[index][2] = GL_TRUE;
    316             tempRead[index][3] = GL_TRUE;
    317          }
    318       }
    319    }
    320 
    321    /* find instructions that write to dead registers, flag for removal */
    322    for (i = 0; i < prog->NumInstructions; i++) {
    323       struct prog_instruction *inst = prog->Instructions + i;
    324       const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
    325 
    326       if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
    327          GLint chan, index = inst->DstReg.Index;
    328 
    329 	 for (chan = 0; chan < 4; chan++) {
    330 	    if (!tempRead[index][chan] &&
    331 		inst->DstReg.WriteMask & (1 << chan)) {
    332 	       if (dbg) {
    333 		  printf("Remove writemask on %u.%c\n", i,
    334 			       chan == 3 ? 'w' : 'x' + chan);
    335 	       }
    336 	       inst->DstReg.WriteMask &= ~(1 << chan);
    337 	       rem++;
    338 	    }
    339 	 }
    340 
    341 	 if (inst->DstReg.WriteMask == 0) {
    342 	    /* If we cleared all writes, the instruction can be removed. */
    343 	    if (dbg)
    344 	       printf("Remove instruction %u: \n", i);
    345 	    removeInst[i] = GL_TRUE;
    346 	 }
    347       }
    348    }
    349 
    350    /* now remove the instructions which aren't needed */
    351    rem = remove_instructions(prog, removeInst);
    352 
    353    if (dbg) {
    354       printf("Optimize: End dead code removal.\n");
    355       printf("  %u channel writes removed\n", rem);
    356       printf("  %u instructions removed\n", rem);
    357       /*_mesa_print_program(prog);*/
    358    }
    359 
    360 done:
    361    free(removeInst);
    362    return rem != 0;
    363 }
    364 
    365 
    366 enum inst_use
    367 {
    368    READ,
    369    WRITE,
    370    FLOW,
    371    END
    372 };
    373 
    374 
    375 /**
    376  * Scan forward in program from 'start' for the next occurances of TEMP[index].
    377  * We look if an instruction reads the component given by the masks and if they
    378  * are overwritten.
    379  * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
    380  * that we can't look further.
    381  */
    382 static enum inst_use
    383 find_next_use(const struct gl_program *prog,
    384               GLuint start,
    385               GLuint index,
    386               GLuint mask)
    387 {
    388    GLuint i;
    389 
    390    for (i = start; i < prog->NumInstructions; i++) {
    391       const struct prog_instruction *inst = prog->Instructions + i;
    392       switch (inst->Opcode) {
    393       case OPCODE_BGNLOOP:
    394       case OPCODE_BGNSUB:
    395       case OPCODE_BRA:
    396       case OPCODE_CAL:
    397       case OPCODE_CONT:
    398       case OPCODE_IF:
    399       case OPCODE_ELSE:
    400       case OPCODE_ENDIF:
    401       case OPCODE_ENDLOOP:
    402       case OPCODE_ENDSUB:
    403       case OPCODE_RET:
    404          return FLOW;
    405       case OPCODE_END:
    406          return END;
    407       default:
    408          {
    409             const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
    410             GLuint j;
    411             for (j = 0; j < numSrc; j++) {
    412                if (inst->SrcReg[j].RelAddr ||
    413                    (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
    414                    inst->SrcReg[j].Index == index &&
    415                    (get_src_arg_mask(inst,j,NO_MASK) & mask)))
    416                   return READ;
    417             }
    418             if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
    419                 inst->DstReg.File == PROGRAM_TEMPORARY &&
    420                 inst->DstReg.Index == index) {
    421                mask &= ~inst->DstReg.WriteMask;
    422                if (mask == 0)
    423                   return WRITE;
    424             }
    425          }
    426       }
    427    }
    428    return END;
    429 }
    430 
    431 
    432 /**
    433  * Is the given instruction opcode a flow-control opcode?
    434  * XXX maybe move this into prog_instruction.[ch]
    435  */
    436 static GLboolean
    437 _mesa_is_flow_control_opcode(enum prog_opcode opcode)
    438 {
    439    switch (opcode) {
    440    case OPCODE_BGNLOOP:
    441    case OPCODE_BGNSUB:
    442    case OPCODE_BRA:
    443    case OPCODE_CAL:
    444    case OPCODE_CONT:
    445    case OPCODE_IF:
    446    case OPCODE_ELSE:
    447    case OPCODE_END:
    448    case OPCODE_ENDIF:
    449    case OPCODE_ENDLOOP:
    450    case OPCODE_ENDSUB:
    451    case OPCODE_RET:
    452       return GL_TRUE;
    453    default:
    454       return GL_FALSE;
    455    }
    456 }
    457 
    458 
    459 /**
    460  * Test if the given instruction is a simple MOV (no conditional updating,
    461  * not relative addressing, no negation/abs, etc).
    462  */
    463 static GLboolean
    464 can_downward_mov_be_modifed(const struct prog_instruction *mov)
    465 {
    466    return
    467       mov->Opcode == OPCODE_MOV &&
    468       mov->CondUpdate == GL_FALSE &&
    469       mov->SrcReg[0].RelAddr == 0 &&
    470       mov->SrcReg[0].Negate == 0 &&
    471       mov->SrcReg[0].Abs == 0 &&
    472       mov->SrcReg[0].HasIndex2 == 0 &&
    473       mov->SrcReg[0].RelAddr2 == 0 &&
    474       mov->DstReg.RelAddr == 0 &&
    475       mov->DstReg.CondMask == COND_TR;
    476 }
    477 
    478 
    479 static GLboolean
    480 can_upward_mov_be_modifed(const struct prog_instruction *mov)
    481 {
    482    return
    483       can_downward_mov_be_modifed(mov) &&
    484       mov->DstReg.File == PROGRAM_TEMPORARY &&
    485       mov->SaturateMode == SATURATE_OFF;
    486 }
    487 
    488 
    489 /**
    490  * Try to remove use of extraneous MOV instructions, to free them up for dead
    491  * code removal.
    492  */
    493 static void
    494 _mesa_remove_extra_move_use(struct gl_program *prog)
    495 {
    496    GLuint i, j;
    497 
    498    if (dbg) {
    499       printf("Optimize: Begin remove extra move use\n");
    500       _mesa_print_program(prog);
    501    }
    502 
    503    /*
    504     * Look for sequences such as this:
    505     *    MOV tmpX, arg0;
    506     *    ...
    507     *    FOO tmpY, tmpX, arg1;
    508     * and convert into:
    509     *    MOV tmpX, arg0;
    510     *    ...
    511     *    FOO tmpY, arg0, arg1;
    512     */
    513 
    514    for (i = 0; i + 1 < prog->NumInstructions; i++) {
    515       const struct prog_instruction *mov = prog->Instructions + i;
    516       GLuint dst_mask, src_mask;
    517       if (can_upward_mov_be_modifed(mov) == GL_FALSE)
    518          continue;
    519 
    520       /* Scanning the code, we maintain the components which are still active in
    521        * these two masks
    522        */
    523       dst_mask = mov->DstReg.WriteMask;
    524       src_mask = get_src_arg_mask(mov, 0, NO_MASK);
    525 
    526       /* Walk through remaining instructions until the or src reg gets
    527        * rewritten or we get into some flow-control, eliminating the use of
    528        * this MOV.
    529        */
    530       for (j = i + 1; j < prog->NumInstructions; j++) {
    531 	 struct prog_instruction *inst2 = prog->Instructions + j;
    532          GLuint arg;
    533 
    534 	 if (_mesa_is_flow_control_opcode(inst2->Opcode))
    535 	     break;
    536 
    537 	 /* First rewrite this instruction's args if appropriate. */
    538 	 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
    539 	    GLuint comp, read_mask;
    540 
    541 	    if (inst2->SrcReg[arg].File != mov->DstReg.File ||
    542 		inst2->SrcReg[arg].Index != mov->DstReg.Index ||
    543 		inst2->SrcReg[arg].RelAddr ||
    544 		inst2->SrcReg[arg].Abs)
    545 	       continue;
    546             read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
    547 
    548 	    /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
    549              * components read still come from the mov instructions
    550              */
    551             if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
    552                (read_mask & dst_mask) == read_mask) {
    553                for (comp = 0; comp < 4; comp++) {
    554                   const GLuint inst2_swz =
    555                      GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
    556                   const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
    557                   inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
    558                   inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
    559                   inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
    560                                                   inst2_swz) & 0x1) << comp);
    561                }
    562                inst2->SrcReg[arg].File = mov->SrcReg[0].File;
    563                inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
    564             }
    565 	 }
    566 
    567 	 /* The source of MOV is written. This potentially deactivates some
    568           * components from the src and dst of the MOV instruction
    569           */
    570 	 if (inst2->DstReg.File == mov->DstReg.File &&
    571 	     (inst2->DstReg.RelAddr ||
    572 	      inst2->DstReg.Index == mov->DstReg.Index)) {
    573             dst_mask &= ~inst2->DstReg.WriteMask;
    574             src_mask = get_src_arg_mask(mov, 0, dst_mask);
    575          }
    576 
    577          /* Idem when the destination of mov is written */
    578 	 if (inst2->DstReg.File == mov->SrcReg[0].File &&
    579 	     (inst2->DstReg.RelAddr ||
    580 	      inst2->DstReg.Index == mov->SrcReg[0].Index)) {
    581             src_mask &= ~inst2->DstReg.WriteMask;
    582             dst_mask &= get_dst_mask_for_mov(mov, src_mask);
    583          }
    584          if (dst_mask == 0)
    585             break;
    586       }
    587    }
    588 
    589    if (dbg) {
    590       printf("Optimize: End remove extra move use.\n");
    591       /*_mesa_print_program(prog);*/
    592    }
    593 }
    594 
    595 
    596 /**
    597  * Complements dead_code_global. Try to remove code in block of code by
    598  * carefully monitoring the swizzles. Both functions should be merged into one
    599  * with a proper control flow graph
    600  */
    601 static GLboolean
    602 _mesa_remove_dead_code_local(struct gl_program *prog)
    603 {
    604    GLboolean *removeInst;
    605    GLuint i, arg, rem = 0;
    606 
    607    removeInst = (GLboolean *)
    608       calloc(1, prog->NumInstructions * sizeof(GLboolean));
    609 
    610    for (i = 0; i < prog->NumInstructions; i++) {
    611       const struct prog_instruction *inst = prog->Instructions + i;
    612       const GLuint index = inst->DstReg.Index;
    613       const GLuint mask = inst->DstReg.WriteMask;
    614       enum inst_use use;
    615 
    616       /* We must deactivate the pass as soon as some indirection is used */
    617       if (inst->DstReg.RelAddr)
    618          goto done;
    619       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
    620          if (inst->SrcReg[arg].RelAddr)
    621             goto done;
    622 
    623       if (_mesa_is_flow_control_opcode(inst->Opcode) ||
    624           _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
    625           inst->DstReg.File != PROGRAM_TEMPORARY ||
    626           inst->DstReg.RelAddr)
    627          continue;
    628 
    629       use = find_next_use(prog, i+1, index, mask);
    630       if (use == WRITE || use == END)
    631          removeInst[i] = GL_TRUE;
    632    }
    633 
    634    rem = remove_instructions(prog, removeInst);
    635 
    636 done:
    637    free(removeInst);
    638    return rem != 0;
    639 }
    640 
    641 
    642 /**
    643  * Try to inject the destination of mov as the destination of inst and recompute
    644  * the swizzles operators for the sources of inst if required. Return GL_TRUE
    645  * of the substitution was possible, GL_FALSE otherwise
    646  */
    647 static GLboolean
    648 _mesa_merge_mov_into_inst(struct prog_instruction *inst,
    649                           const struct prog_instruction *mov)
    650 {
    651    /* Indirection table which associates destination and source components for
    652     * the mov instruction
    653     */
    654    const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
    655 
    656    /* Some components are not written by inst. We cannot remove the mov */
    657    if (mask != (inst->DstReg.WriteMask & mask))
    658       return GL_FALSE;
    659 
    660    inst->SaturateMode |= mov->SaturateMode;
    661 
    662    /* Depending on the instruction, we may need to recompute the swizzles.
    663     * Also, some other instructions (like TEX) are not linear. We will only
    664     * consider completely active sources and destinations
    665     */
    666    switch (inst->Opcode) {
    667 
    668    /* Carstesian instructions: we compute the swizzle */
    669    case OPCODE_MOV:
    670    case OPCODE_MIN:
    671    case OPCODE_MAX:
    672    case OPCODE_ABS:
    673    case OPCODE_ADD:
    674    case OPCODE_MAD:
    675    case OPCODE_MUL:
    676    case OPCODE_SUB:
    677    {
    678       GLuint dst_to_src_comp[4] = {0,0,0,0};
    679       GLuint dst_comp, arg;
    680       for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
    681          if (mov->DstReg.WriteMask & (1 << dst_comp)) {
    682             const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
    683             ASSERT(src_comp < 4);
    684             dst_to_src_comp[dst_comp] = src_comp;
    685          }
    686       }
    687 
    688       /* Patch each source of the instruction */
    689       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
    690          const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
    691          inst->SrcReg[arg].Swizzle = 0;
    692 
    693          /* Reset each active component of the swizzle */
    694          for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
    695             GLuint src_comp, arg_comp;
    696             if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
    697                continue;
    698             src_comp = dst_to_src_comp[dst_comp];
    699             ASSERT(src_comp < 4);
    700             arg_comp = GET_SWZ(arg_swz, src_comp);
    701             ASSERT(arg_comp < 4);
    702             inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
    703          }
    704       }
    705       inst->DstReg = mov->DstReg;
    706       return GL_TRUE;
    707    }
    708 
    709    /* Dot products and scalar instructions: we only change the destination */
    710    case OPCODE_RCP:
    711    case OPCODE_SIN:
    712    case OPCODE_COS:
    713    case OPCODE_RSQ:
    714    case OPCODE_POW:
    715    case OPCODE_EX2:
    716    case OPCODE_LOG:
    717    case OPCODE_DP2:
    718    case OPCODE_DP3:
    719    case OPCODE_DP4:
    720       inst->DstReg = mov->DstReg;
    721       return GL_TRUE;
    722 
    723    /* All other instructions require fully active components with no swizzle */
    724    default:
    725       if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
    726           inst->DstReg.WriteMask != WRITEMASK_XYZW)
    727          return GL_FALSE;
    728       inst->DstReg = mov->DstReg;
    729       return GL_TRUE;
    730    }
    731 }
    732 
    733 
    734 /**
    735  * Try to remove extraneous MOV instructions from the given program.
    736  */
    737 static GLboolean
    738 _mesa_remove_extra_moves(struct gl_program *prog)
    739 {
    740    GLboolean *removeInst; /* per-instruction removal flag */
    741    GLuint i, rem = 0, nesting = 0;
    742 
    743    if (dbg) {
    744       printf("Optimize: Begin remove extra moves\n");
    745       _mesa_print_program(prog);
    746    }
    747 
    748    removeInst = (GLboolean *)
    749       calloc(1, prog->NumInstructions * sizeof(GLboolean));
    750 
    751    /*
    752     * Look for sequences such as this:
    753     *    FOO tmpX, arg0, arg1;
    754     *    MOV tmpY, tmpX;
    755     * and convert into:
    756     *    FOO tmpY, arg0, arg1;
    757     */
    758 
    759    for (i = 0; i < prog->NumInstructions; i++) {
    760       const struct prog_instruction *mov = prog->Instructions + i;
    761 
    762       switch (mov->Opcode) {
    763       case OPCODE_BGNLOOP:
    764       case OPCODE_BGNSUB:
    765       case OPCODE_IF:
    766          nesting++;
    767          break;
    768       case OPCODE_ENDLOOP:
    769       case OPCODE_ENDSUB:
    770       case OPCODE_ENDIF:
    771          nesting--;
    772          break;
    773       case OPCODE_MOV:
    774          if (i > 0 &&
    775              can_downward_mov_be_modifed(mov) &&
    776              mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
    777              nesting == 0)
    778          {
    779 
    780             /* see if this MOV can be removed */
    781             const GLuint id = mov->SrcReg[0].Index;
    782             struct prog_instruction *prevInst;
    783             GLuint prevI;
    784 
    785             /* get pointer to previous instruction */
    786             prevI = i - 1;
    787             while (prevI > 0 && removeInst[prevI])
    788                prevI--;
    789             prevInst = prog->Instructions + prevI;
    790 
    791             if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
    792                 prevInst->DstReg.Index == id &&
    793                 prevInst->DstReg.RelAddr == 0 &&
    794                 prevInst->DstReg.CondSrc == 0 &&
    795                 prevInst->DstReg.CondMask == COND_TR) {
    796 
    797                const GLuint dst_mask = prevInst->DstReg.WriteMask;
    798                enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
    799 
    800                if (next_use == WRITE || next_use == END) {
    801                   /* OK, we can safely remove this MOV instruction.
    802                    * Transform:
    803                    *   prevI: FOO tempIndex, x, y;
    804                    *       i: MOV z, tempIndex;
    805                    * Into:
    806                    *   prevI: FOO z, x, y;
    807                    */
    808                   if (_mesa_merge_mov_into_inst(prevInst, mov)) {
    809                      removeInst[i] = GL_TRUE;
    810                      if (dbg) {
    811                         printf("Remove MOV at %u\n", i);
    812                         printf("new prev inst %u: ", prevI);
    813                         _mesa_print_instruction(prevInst);
    814                      }
    815                   }
    816                }
    817             }
    818          }
    819          break;
    820       default:
    821          ; /* nothing */
    822       }
    823    }
    824 
    825    /* now remove the instructions which aren't needed */
    826    rem = remove_instructions(prog, removeInst);
    827 
    828    free(removeInst);
    829 
    830    if (dbg) {
    831       printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
    832       /*_mesa_print_program(prog);*/
    833    }
    834 
    835    return rem != 0;
    836 }
    837 
    838 
    839 /** A live register interval */
    840 struct interval
    841 {
    842    GLuint Reg;         /** The temporary register index */
    843    GLuint Start, End;  /** Start/end instruction numbers */
    844 };
    845 
    846 
    847 /** A list of register intervals */
    848 struct interval_list
    849 {
    850    GLuint Num;
    851    struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
    852 };
    853 
    854 
    855 static void
    856 append_interval(struct interval_list *list, const struct interval *inv)
    857 {
    858    list->Intervals[list->Num++] = *inv;
    859 }
    860 
    861 
    862 /** Insert interval inv into list, sorted by interval end */
    863 static void
    864 insert_interval_by_end(struct interval_list *list, const struct interval *inv)
    865 {
    866    /* XXX we could do a binary search insertion here since list is sorted */
    867    GLint i = list->Num - 1;
    868    while (i >= 0 && list->Intervals[i].End > inv->End) {
    869       list->Intervals[i + 1] = list->Intervals[i];
    870       i--;
    871    }
    872    list->Intervals[i + 1] = *inv;
    873    list->Num++;
    874 
    875 #ifdef DEBUG
    876    {
    877       GLuint i;
    878       for (i = 0; i + 1 < list->Num; i++) {
    879          ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
    880       }
    881    }
    882 #endif
    883 }
    884 
    885 
    886 /** Remove the given interval from the interval list */
    887 static void
    888 remove_interval(struct interval_list *list, const struct interval *inv)
    889 {
    890    /* XXX we could binary search since list is sorted */
    891    GLuint k;
    892    for (k = 0; k < list->Num; k++) {
    893       if (list->Intervals[k].Reg == inv->Reg) {
    894          /* found, remove it */
    895          ASSERT(list->Intervals[k].Start == inv->Start);
    896          ASSERT(list->Intervals[k].End == inv->End);
    897          while (k < list->Num - 1) {
    898             list->Intervals[k] = list->Intervals[k + 1];
    899             k++;
    900          }
    901          list->Num--;
    902          return;
    903       }
    904    }
    905 }
    906 
    907 
    908 /** called by qsort() */
    909 static int
    910 compare_start(const void *a, const void *b)
    911 {
    912    const struct interval *ia = (const struct interval *) a;
    913    const struct interval *ib = (const struct interval *) b;
    914    if (ia->Start < ib->Start)
    915       return -1;
    916    else if (ia->Start > ib->Start)
    917       return +1;
    918    else
    919       return 0;
    920 }
    921 
    922 
    923 /** sort the interval list according to interval starts */
    924 static void
    925 sort_interval_list_by_start(struct interval_list *list)
    926 {
    927    qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
    928 #ifdef DEBUG
    929    {
    930       GLuint i;
    931       for (i = 0; i + 1 < list->Num; i++) {
    932          ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
    933       }
    934    }
    935 #endif
    936 }
    937 
    938 struct loop_info
    939 {
    940    GLuint Start, End;  /**< Start, end instructions of loop */
    941 };
    942 
    943 /**
    944  * Update the intermediate interval info for register 'index' and
    945  * instruction 'ic'.
    946  */
    947 static void
    948 update_interval(GLint intBegin[], GLint intEnd[],
    949 		struct loop_info *loopStack, GLuint loopStackDepth,
    950 		GLuint index, GLuint ic)
    951 {
    952    int i;
    953    GLuint begin = ic;
    954    GLuint end = ic;
    955 
    956    /* If the register is used in a loop, extend its lifetime through the end
    957     * of the outermost loop that doesn't contain its definition.
    958     */
    959    for (i = 0; i < loopStackDepth; i++) {
    960       if (intBegin[index] < loopStack[i].Start) {
    961 	 end = loopStack[i].End;
    962 	 break;
    963       }
    964    }
    965 
    966    /* Variables that are live at the end of a loop will also be live at the
    967     * beginning, so an instruction inside of a loop should have its live
    968     * interval begin at the start of the outermost loop.
    969     */
    970    if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
    971       begin = loopStack[0].Start;
    972    }
    973 
    974    ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
    975    if (intBegin[index] == -1) {
    976       ASSERT(intEnd[index] == -1);
    977       intBegin[index] = begin;
    978       intEnd[index] = end;
    979    }
    980    else {
    981       intEnd[index] = end;
    982    }
    983 }
    984 
    985 
    986 /**
    987  * Find first/last instruction that references each temporary register.
    988  */
    989 GLboolean
    990 _mesa_find_temp_intervals(const struct prog_instruction *instructions,
    991                           GLuint numInstructions,
    992                           GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
    993                           GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
    994 {
    995    struct loop_info loopStack[MAX_LOOP_NESTING];
    996    GLuint loopStackDepth = 0;
    997    GLuint i;
    998 
    999    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
   1000       intBegin[i] = intEnd[i] = -1;
   1001    }
   1002 
   1003    /* Scan instructions looking for temporary registers */
   1004    for (i = 0; i < numInstructions; i++) {
   1005       const struct prog_instruction *inst = instructions + i;
   1006       if (inst->Opcode == OPCODE_BGNLOOP) {
   1007          loopStack[loopStackDepth].Start = i;
   1008          loopStack[loopStackDepth].End = inst->BranchTarget;
   1009          loopStackDepth++;
   1010       }
   1011       else if (inst->Opcode == OPCODE_ENDLOOP) {
   1012          loopStackDepth--;
   1013       }
   1014       else if (inst->Opcode == OPCODE_CAL) {
   1015          return GL_FALSE;
   1016       }
   1017       else {
   1018          const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
   1019          GLuint j;
   1020          for (j = 0; j < numSrc; j++) {
   1021             if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
   1022                const GLuint index = inst->SrcReg[j].Index;
   1023                if (inst->SrcReg[j].RelAddr)
   1024                   return GL_FALSE;
   1025                update_interval(intBegin, intEnd, loopStack, loopStackDepth,
   1026 			       index, i);
   1027             }
   1028          }
   1029          if (inst->DstReg.File == PROGRAM_TEMPORARY) {
   1030             const GLuint index = inst->DstReg.Index;
   1031             if (inst->DstReg.RelAddr)
   1032                return GL_FALSE;
   1033             update_interval(intBegin, intEnd, loopStack, loopStackDepth,
   1034 			    index, i);
   1035          }
   1036       }
   1037    }
   1038 
   1039    return GL_TRUE;
   1040 }
   1041 
   1042 
   1043 /**
   1044  * Find the live intervals for each temporary register in the program.
   1045  * For register R, the interval [A,B] indicates that R is referenced
   1046  * from instruction A through instruction B.
   1047  * Special consideration is needed for loops and subroutines.
   1048  * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
   1049  */
   1050 static GLboolean
   1051 find_live_intervals(struct gl_program *prog,
   1052                     struct interval_list *liveIntervals)
   1053 {
   1054    GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   1055    GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   1056    GLuint i;
   1057 
   1058    /*
   1059     * Note: we'll return GL_FALSE below if we find relative indexing
   1060     * into the TEMP register file.  We can't handle that yet.
   1061     * We also give up on subroutines for now.
   1062     */
   1063 
   1064    if (dbg) {
   1065       printf("Optimize: Begin find intervals\n");
   1066    }
   1067 
   1068    /* build intermediate arrays */
   1069    if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
   1070                                   intBegin, intEnd))
   1071       return GL_FALSE;
   1072 
   1073    /* Build live intervals list from intermediate arrays */
   1074    liveIntervals->Num = 0;
   1075    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
   1076       if (intBegin[i] >= 0) {
   1077          struct interval inv;
   1078          inv.Reg = i;
   1079          inv.Start = intBegin[i];
   1080          inv.End = intEnd[i];
   1081          append_interval(liveIntervals, &inv);
   1082       }
   1083    }
   1084 
   1085    /* Sort the list according to interval starts */
   1086    sort_interval_list_by_start(liveIntervals);
   1087 
   1088    if (dbg) {
   1089       /* print interval info */
   1090       for (i = 0; i < liveIntervals->Num; i++) {
   1091          const struct interval *inv = liveIntervals->Intervals + i;
   1092          printf("Reg[%d] live [%d, %d]:",
   1093                       inv->Reg, inv->Start, inv->End);
   1094          if (1) {
   1095             GLuint j;
   1096             for (j = 0; j < inv->Start; j++)
   1097                printf(" ");
   1098             for (j = inv->Start; j <= inv->End; j++)
   1099                printf("x");
   1100          }
   1101          printf("\n");
   1102       }
   1103    }
   1104 
   1105    return GL_TRUE;
   1106 }
   1107 
   1108 
   1109 /** Scan the array of used register flags to find free entry */
   1110 static GLint
   1111 alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
   1112 {
   1113    GLuint k;
   1114    for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
   1115       if (!usedRegs[k]) {
   1116          usedRegs[k] = GL_TRUE;
   1117          return k;
   1118       }
   1119    }
   1120    return -1;
   1121 }
   1122 
   1123 
   1124 /**
   1125  * This function implements "Linear Scan Register Allocation" to reduce
   1126  * the number of temporary registers used by the program.
   1127  *
   1128  * We compute the "live interval" for all temporary registers then
   1129  * examine the overlap of the intervals to allocate new registers.
   1130  * Basically, if two intervals do not overlap, they can use the same register.
   1131  */
   1132 static void
   1133 _mesa_reallocate_registers(struct gl_program *prog)
   1134 {
   1135    struct interval_list liveIntervals;
   1136    GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   1137    GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   1138    GLuint i;
   1139    GLint maxTemp = -1;
   1140 
   1141    if (dbg) {
   1142       printf("Optimize: Begin live-interval register reallocation\n");
   1143       _mesa_print_program(prog);
   1144    }
   1145 
   1146    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
   1147       registerMap[i] = -1;
   1148       usedRegs[i] = GL_FALSE;
   1149    }
   1150 
   1151    if (!find_live_intervals(prog, &liveIntervals)) {
   1152       if (dbg)
   1153          printf("Aborting register reallocation\n");
   1154       return;
   1155    }
   1156 
   1157    {
   1158       struct interval_list activeIntervals;
   1159       activeIntervals.Num = 0;
   1160 
   1161       /* loop over live intervals, allocating a new register for each */
   1162       for (i = 0; i < liveIntervals.Num; i++) {
   1163          const struct interval *live = liveIntervals.Intervals + i;
   1164 
   1165          if (dbg)
   1166             printf("Consider register %u\n", live->Reg);
   1167 
   1168          /* Expire old intervals.  Intervals which have ended with respect
   1169           * to the live interval can have their remapped registers freed.
   1170           */
   1171          {
   1172             GLint j;
   1173             for (j = 0; j < (GLint) activeIntervals.Num; j++) {
   1174                const struct interval *inv = activeIntervals.Intervals + j;
   1175                if (inv->End >= live->Start) {
   1176                   /* Stop now.  Since the activeInterval list is sorted
   1177                    * we know we don't have to go further.
   1178                    */
   1179                   break;
   1180                }
   1181                else {
   1182                   /* Interval 'inv' has expired */
   1183                   const GLint regNew = registerMap[inv->Reg];
   1184                   ASSERT(regNew >= 0);
   1185 
   1186                   if (dbg)
   1187                      printf("  expire interval for reg %u\n", inv->Reg);
   1188 
   1189                   /* remove interval j from active list */
   1190                   remove_interval(&activeIntervals, inv);
   1191                   j--;  /* counter-act j++ in for-loop above */
   1192 
   1193                   /* return register regNew to the free pool */
   1194                   if (dbg)
   1195                      printf("  free reg %d\n", regNew);
   1196                   ASSERT(usedRegs[regNew] == GL_TRUE);
   1197                   usedRegs[regNew] = GL_FALSE;
   1198                }
   1199             }
   1200          }
   1201 
   1202          /* find a free register for this live interval */
   1203          {
   1204             const GLint k = alloc_register(usedRegs);
   1205             if (k < 0) {
   1206                /* out of registers, give up */
   1207                return;
   1208             }
   1209             registerMap[live->Reg] = k;
   1210             maxTemp = MAX2(maxTemp, k);
   1211             if (dbg)
   1212                printf("  remap register %u -> %d\n", live->Reg, k);
   1213          }
   1214 
   1215          /* Insert this live interval into the active list which is sorted
   1216           * by increasing end points.
   1217           */
   1218          insert_interval_by_end(&activeIntervals, live);
   1219       }
   1220    }
   1221 
   1222    if (maxTemp + 1 < (GLint) liveIntervals.Num) {
   1223       /* OK, we've reduced the number of registers needed.
   1224        * Scan the program and replace all the old temporary register
   1225        * indexes with the new indexes.
   1226        */
   1227       replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
   1228 
   1229       prog->NumTemporaries = maxTemp + 1;
   1230    }
   1231 
   1232    if (dbg) {
   1233       printf("Optimize: End live-interval register reallocation\n");
   1234       printf("Num temp regs before: %u  after: %u\n",
   1235                    liveIntervals.Num, maxTemp + 1);
   1236       _mesa_print_program(prog);
   1237    }
   1238 }
   1239 
   1240 
   1241 #if 0
   1242 static void
   1243 print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
   1244    fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
   1245    _mesa_print_program(program);
   1246    _mesa_print_program_parameters(ctx, program);
   1247    fprintf(stderr, "\n\n");
   1248 }
   1249 #endif
   1250 
   1251 /**
   1252  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
   1253  * instruction is the first instruction to write to register T0.  The are
   1254  * several lowering passes done in GLSL IR (e.g. branches and
   1255  * relative addressing) that create a large number of conditional assignments
   1256  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
   1257  *
   1258  * Here is why this conversion is safe:
   1259  * CMP T0, T1 T2 T0 can be expanded to:
   1260  * if (T1 < 0.0)
   1261  * 	MOV T0, T2;
   1262  * else
   1263  * 	MOV T0, T0;
   1264  *
   1265  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
   1266  * as the original program.  If (T1 < 0.0) evaluates to false, executing
   1267  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
   1268  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
   1269  * because any instruction that was going to read from T0 after this was going
   1270  * to read a garbage value anyway.
   1271  */
   1272 static void
   1273 _mesa_simplify_cmp(struct gl_program * program)
   1274 {
   1275    GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
   1276    GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
   1277    GLuint i;
   1278 
   1279    if (dbg) {
   1280       printf("Optimize: Begin reads without writes\n");
   1281       _mesa_print_program(program);
   1282    }
   1283 
   1284    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
   1285       tempWrites[i] = 0;
   1286    }
   1287 
   1288    for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
   1289       outputWrites[i] = 0;
   1290    }
   1291 
   1292    for (i = 0; i < program->NumInstructions; i++) {
   1293       struct prog_instruction *inst = program->Instructions + i;
   1294       GLuint prevWriteMask;
   1295 
   1296       /* Give up if we encounter relative addressing or flow control. */
   1297       if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
   1298          return;
   1299       }
   1300 
   1301       if (inst->DstReg.File == PROGRAM_OUTPUT) {
   1302          assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
   1303          prevWriteMask = outputWrites[inst->DstReg.Index];
   1304          outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
   1305       } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
   1306          assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
   1307          prevWriteMask = tempWrites[inst->DstReg.Index];
   1308          tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
   1309       } else {
   1310          /* No other register type can be a destination register. */
   1311          continue;
   1312       }
   1313 
   1314       /* For a CMP to be considered a conditional write, the destination
   1315        * register and source register two must be the same. */
   1316       if (inst->Opcode == OPCODE_CMP
   1317           && !(inst->DstReg.WriteMask & prevWriteMask)
   1318           && inst->SrcReg[2].File == inst->DstReg.File
   1319           && inst->SrcReg[2].Index == inst->DstReg.Index
   1320           && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
   1321 
   1322          inst->Opcode = OPCODE_MOV;
   1323          inst->SrcReg[0] = inst->SrcReg[1];
   1324 
   1325 	 /* Unused operands are expected to have the file set to
   1326 	  * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
   1327 	  * all of the sources.
   1328 	  */
   1329 	 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
   1330 	 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
   1331 	 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
   1332 	 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
   1333       }
   1334    }
   1335    if (dbg) {
   1336       printf("Optimize: End reads without writes\n");
   1337       _mesa_print_program(program);
   1338    }
   1339 }
   1340 
   1341 /**
   1342  * Apply optimizations to the given program to eliminate unnecessary
   1343  * instructions, temp regs, etc.
   1344  */
   1345 void
   1346 _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
   1347 {
   1348    GLboolean any_change;
   1349 
   1350    _mesa_simplify_cmp(program);
   1351    /* Stop when no modifications were output */
   1352    do {
   1353       any_change = GL_FALSE;
   1354       _mesa_remove_extra_move_use(program);
   1355       if (_mesa_remove_dead_code_global(program))
   1356          any_change = GL_TRUE;
   1357       if (_mesa_remove_extra_moves(program))
   1358          any_change = GL_TRUE;
   1359       if (_mesa_remove_dead_code_local(program))
   1360          any_change = GL_TRUE;
   1361 
   1362       any_change = _mesa_constant_fold(program) || any_change;
   1363       _mesa_reallocate_registers(program);
   1364    } while (any_change);
   1365 }
   1366 
   1367