Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2014 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "brw_fs.h"
     25 #include "brw_cfg.h"
     26 #include "brw_eu.h"
     27 
     28 /** @file brw_fs_cmod_propagation.cpp
     29  *
     30  * Implements a pass that propagates the conditional modifier from a CMP x 0.0
     31  * instruction into the instruction that generated x. For instance, in this
     32  * sequence
     33  *
     34  *    add(8)          g70<1>F    g69<8,8,1>F    4096F
     35  *    cmp.ge.f0(8)    null       g70<8,8,1>F    0F
     36  *
     37  * we can do the comparison as part of the ADD instruction directly:
     38  *
     39  *    add.ge.f0(8)    g70<1>F    g69<8,8,1>F    4096F
     40  *
     41  * If there had been a use of the flag register and another CMP using g70
     42  *
     43  *    add.ge.f0(8)    g70<1>F    g69<8,8,1>F    4096F
     44  *    (+f0) sel(8)    g71<F>     g72<8,8,1>F    g73<8,8,1>F
     45  *    cmp.ge.f0(8)    null       g70<8,8,1>F    0F
     46  *
     47  * we can recognize that the CMP is generating the flag value that already
     48  * exists and therefore remove the instruction.
     49  */
     50 
     51 static bool
     52 opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
     53 {
     54    bool progress = false;
     55    int ip = block->end_ip + 1;
     56 
     57    foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
     58       ip--;
     59 
     60       if ((inst->opcode != BRW_OPCODE_AND &&
     61            inst->opcode != BRW_OPCODE_CMP &&
     62            inst->opcode != BRW_OPCODE_MOV) ||
     63           inst->predicate != BRW_PREDICATE_NONE ||
     64           !inst->dst.is_null() ||
     65           inst->src[0].file != VGRF ||
     66           inst->src[0].abs)
     67          continue;
     68 
     69       /* Only an AND.NZ can be propagated.  Many AND.Z instructions are
     70        * generated (for ir_unop_not in fs_visitor::emit_bool_to_cond_code).
     71        * Propagating those would require inverting the condition on the CMP.
     72        * This changes both the flag value and the register destination of the
     73        * CMP.  That result may be used elsewhere, so we can't change its value
     74        * on a whim.
     75        */
     76       if (inst->opcode == BRW_OPCODE_AND &&
     77           !(inst->src[1].is_one() &&
     78             inst->conditional_mod == BRW_CONDITIONAL_NZ &&
     79             !inst->src[0].negate))
     80          continue;
     81 
     82       if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero())
     83          continue;
     84 
     85       if (inst->opcode == BRW_OPCODE_MOV &&
     86           inst->conditional_mod != BRW_CONDITIONAL_NZ)
     87          continue;
     88 
     89       bool read_flag = false;
     90       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
     91          if (regions_overlap(scan_inst->dst, scan_inst->size_written,
     92                              inst->src[0], inst->size_read(0))) {
     93             if (scan_inst->is_partial_write() ||
     94                 scan_inst->dst.offset != inst->src[0].offset ||
     95                 scan_inst->exec_size != inst->exec_size)
     96                break;
     97 
     98             /* CMP's result is the same regardless of dest type. */
     99             if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
    100                 scan_inst->opcode == BRW_OPCODE_CMP &&
    101                 (inst->dst.type == BRW_REGISTER_TYPE_D ||
    102                  inst->dst.type == BRW_REGISTER_TYPE_UD)) {
    103                inst->remove(block);
    104                progress = true;
    105                break;
    106             }
    107 
    108             /* If the AND wasn't handled by the previous case, it isn't safe
    109              * to remove it.
    110              */
    111             if (inst->opcode == BRW_OPCODE_AND)
    112                break;
    113 
    114             /* Comparisons operate differently for ints and floats */
    115             if (scan_inst->dst.type != inst->dst.type &&
    116                 (scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
    117                  inst->dst.type == BRW_REGISTER_TYPE_F))
    118                break;
    119 
    120             /* If the instruction generating inst's source also wrote the
    121              * flag, and inst is doing a simple .nz comparison, then inst
    122              * is redundant - the appropriate value is already in the flag
    123              * register.  Delete inst.
    124              */
    125             if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
    126                 !inst->src[0].negate &&
    127                 scan_inst->flags_written()) {
    128                inst->remove(block);
    129                progress = true;
    130                break;
    131             }
    132 
    133             /* The conditional mod of the CMP/CMPN instructions behaves
    134              * specially because the flag output is not calculated from the
    135              * result of the instruction, but the other way around, which
    136              * means that even if the condmod to propagate and the condmod
    137              * from the CMP instruction are the same they will in general give
    138              * different results because they are evaluated based on different
    139              * inputs.
    140              */
    141             if (scan_inst->opcode == BRW_OPCODE_CMP ||
    142                 scan_inst->opcode == BRW_OPCODE_CMPN)
    143                break;
    144 
    145             /* Otherwise, try propagating the conditional. */
    146             enum brw_conditional_mod cond =
    147                inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod)
    148                                    : inst->conditional_mod;
    149 
    150             if (scan_inst->can_do_cmod() &&
    151                 ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
    152                  scan_inst->conditional_mod == cond)) {
    153                scan_inst->conditional_mod = cond;
    154                inst->remove(block);
    155                progress = true;
    156             }
    157             break;
    158          }
    159 
    160          if (scan_inst->flags_written())
    161             break;
    162 
    163          read_flag = read_flag || scan_inst->flags_read(devinfo);
    164       }
    165    }
    166 
    167    return progress;
    168 }
    169 
    170 bool
    171 fs_visitor::opt_cmod_propagation()
    172 {
    173    bool progress = false;
    174 
    175    foreach_block_reverse(block, cfg) {
    176       progress = opt_cmod_propagation_local(devinfo, block) || progress;
    177    }
    178 
    179    if (progress)
    180       invalidate_live_intervals();
    181 
    182    return progress;
    183 }
    184