Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2012 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "brw_fs.h"
     25 #include "brw_fs_cfg.h"
     26 
     27 /** @file brw_fs_cse.cpp
     28  *
     29  * Support for local common subexpression elimination.
     30  *
     31  * See Muchnik's Advanced Compiler Design and Implementation, section
     32  * 13.1 (p378).
     33  */
     34 
     35 namespace {
     36 struct aeb_entry : public exec_node {
     37    /** The instruction that generates the expression value. */
     38    fs_inst *generator;
     39 
     40    /** The temporary where the value is stored. */
     41    fs_reg tmp;
     42 };
     43 }
     44 
     45 static bool
     46 is_expression(const fs_inst *const inst)
     47 {
     48    switch (inst->opcode) {
     49    case BRW_OPCODE_SEL:
     50    case BRW_OPCODE_NOT:
     51    case BRW_OPCODE_AND:
     52    case BRW_OPCODE_OR:
     53    case BRW_OPCODE_XOR:
     54    case BRW_OPCODE_SHR:
     55    case BRW_OPCODE_SHL:
     56    case BRW_OPCODE_RSR:
     57    case BRW_OPCODE_RSL:
     58    case BRW_OPCODE_ASR:
     59    case BRW_OPCODE_ADD:
     60    case BRW_OPCODE_MUL:
     61    case BRW_OPCODE_FRC:
     62    case BRW_OPCODE_RNDU:
     63    case BRW_OPCODE_RNDD:
     64    case BRW_OPCODE_RNDE:
     65    case BRW_OPCODE_RNDZ:
     66    case BRW_OPCODE_LINE:
     67    case BRW_OPCODE_PLN:
     68    case BRW_OPCODE_MAD:
     69    case FS_OPCODE_CINTERP:
     70    case FS_OPCODE_LINTERP:
     71       return true;
     72    default:
     73       return false;
     74    }
     75 }
     76 
     77 static bool
     78 operands_match(fs_reg *xs, fs_reg *ys)
     79 {
     80    return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
     81 }
     82 
     83 bool
     84 fs_visitor::opt_cse_local(fs_bblock *block, exec_list *aeb)
     85 {
     86    bool progress = false;
     87 
     88    void *mem_ctx = ralloc_context(this->mem_ctx);
     89 
     90    for (fs_inst *inst = block->start;
     91 	inst != block->end->next;
     92 	inst = (fs_inst *) inst->next) {
     93 
     94       /* Skip some cases. */
     95       if (is_expression(inst) && !inst->predicated && inst->mlen == 0 &&
     96           !inst->force_uncompressed && !inst->force_sechalf &&
     97           !inst->conditional_mod)
     98       {
     99 	 bool found = false;
    100 
    101 	 aeb_entry *entry;
    102 	 foreach_list(entry_node, aeb) {
    103 	    entry = (aeb_entry *) entry_node;
    104 
    105 	    /* Match current instruction's expression against those in AEB. */
    106 	    if (inst->opcode == entry->generator->opcode &&
    107 		inst->saturate == entry->generator->saturate &&
    108 		operands_match(entry->generator->src, inst->src)) {
    109 
    110 	       found = true;
    111 	       progress = true;
    112 	       break;
    113 	    }
    114 	 }
    115 
    116 	 if (!found) {
    117 	    /* Our first sighting of this expression.  Create an entry. */
    118 	    aeb_entry *entry = ralloc(mem_ctx, aeb_entry);
    119 	    entry->tmp = reg_undef;
    120 	    entry->generator = inst;
    121 	    aeb->push_tail(entry);
    122 	 } else {
    123 	    /* This is at least our second sighting of this expression.
    124 	     * If we don't have a temporary already, make one.
    125 	     */
    126 	    bool no_existing_temp = entry->tmp.file == BAD_FILE;
    127 	    if (no_existing_temp) {
    128 	       entry->tmp = fs_reg(this, glsl_type::float_type);
    129 	       entry->tmp.type = inst->dst.type;
    130 
    131 	       fs_inst *copy = new(ralloc_parent(inst))
    132 		  fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp);
    133 	       entry->generator->insert_after(copy);
    134 	       entry->generator->dst = entry->tmp;
    135 	    }
    136 
    137 	    /* dest <- temp */
    138 	    fs_inst *copy = new(ralloc_parent(inst))
    139 	       fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp);
    140 	    inst->replace_with(copy);
    141 
    142 	    /* Appending an instruction may have changed our bblock end. */
    143 	    if (inst == block->end) {
    144 	       block->end = copy;
    145 	    }
    146 
    147 	    /* Continue iteration with copy->next */
    148 	    inst = copy;
    149 	 }
    150       }
    151 
    152       /* Kill all AEB entries that use the destination. */
    153       foreach_list_safe(entry_node, aeb) {
    154 	 aeb_entry *entry = (aeb_entry *)entry_node;
    155 
    156 	 for (int i = 0; i < 3; i++) {
    157             if (inst->overwrites_reg(entry->generator->src[i])) {
    158 	       entry->remove();
    159 	       ralloc_free(entry);
    160 	       break;
    161 	    }
    162 	 }
    163       }
    164    }
    165 
    166    ralloc_free(mem_ctx);
    167 
    168    if (progress)
    169       this->live_intervals_valid = false;
    170 
    171    return progress;
    172 }
    173 
    174 bool
    175 fs_visitor::opt_cse()
    176 {
    177    bool progress = false;
    178 
    179    fs_cfg cfg(this);
    180 
    181    for (int b = 0; b < cfg.num_blocks; b++) {
    182       fs_bblock *block = cfg.blocks[b];
    183       exec_list aeb;
    184 
    185       progress = opt_cse_local(block, &aeb) || progress;
    186    }
    187 
    188    return progress;
    189 }
    190