Home | History | Annotate | Download | only in vc4
      1 /*
      2  * Copyright  2014 Broadcom
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * @file vc4_opt_algebraic.c
     26  *
     27  * This is the optimization pass for miscellaneous changes to instructions
     28  * where we can simplify the operation by some knowledge about the specific
     29  * operations.
     30  *
     31  * Mostly this will be a matter of turning things into MOVs so that they can
     32  * later be copy-propagated out.
     33  */
     34 
     35 #include "vc4_qir.h"
     36 #include "util/u_math.h"
     37 
     38 static bool debug;
     39 
     40 static void
     41 dump_from(struct vc4_compile *c, struct qinst *inst)
     42 {
     43         if (!debug)
     44                 return;
     45 
     46         fprintf(stderr, "optimizing: ");
     47         qir_dump_inst(c, inst);
     48         fprintf(stderr, "\n");
     49 }
     50 
     51 static void
     52 dump_to(struct vc4_compile *c, struct qinst *inst)
     53 {
     54         if (!debug)
     55                 return;
     56 
     57         fprintf(stderr, "to: ");
     58         qir_dump_inst(c, inst);
     59         fprintf(stderr, "\n");
     60 }
     61 
     62 static bool
     63 is_constant_value(struct vc4_compile *c, struct qreg reg,
     64                   uint32_t val)
     65 {
     66         if (reg.file == QFILE_UNIF &&
     67             !reg.pack &&
     68             c->uniform_contents[reg.index] == QUNIFORM_CONSTANT &&
     69             c->uniform_data[reg.index] == val) {
     70                 return true;
     71         }
     72 
     73         if (reg.file == QFILE_SMALL_IMM && reg.index == val)
     74                 return true;
     75 
     76         return false;
     77 }
     78 
     79 static bool
     80 is_zero(struct vc4_compile *c, struct qreg reg)
     81 {
     82         reg = qir_follow_movs(c, reg);
     83         return is_constant_value(c, reg, 0);
     84 }
     85 
     86 static bool
     87 is_1f(struct vc4_compile *c, struct qreg reg)
     88 {
     89         reg = qir_follow_movs(c, reg);
     90         return is_constant_value(c, reg, fui(1.0));
     91 }
     92 
     93 static void
     94 replace_with_mov(struct vc4_compile *c, struct qinst *inst, struct qreg arg)
     95 {
     96         dump_from(c, inst);
     97 
     98         inst->src[0] = arg;
     99         if (qir_has_implicit_tex_uniform(inst))
    100                 inst->src[1] = inst->src[qir_get_tex_uniform_src(inst)];
    101 
    102         if (qir_is_mul(inst))
    103                 inst->op = QOP_MMOV;
    104         else if (qir_is_float_input(inst))
    105                 inst->op = QOP_FMOV;
    106         else
    107                 inst->op = QOP_MOV;
    108         dump_to(c, inst);
    109 }
    110 
    111 static bool
    112 replace_x_0_with_x(struct vc4_compile *c,
    113                  struct qinst *inst,
    114                  int arg)
    115 {
    116         if (!is_zero(c, inst->src[arg]))
    117                 return false;
    118         replace_with_mov(c, inst, inst->src[1 - arg]);
    119         return true;
    120 }
    121 
    122 static bool
    123 replace_x_0_with_0(struct vc4_compile *c,
    124                   struct qinst *inst,
    125                   int arg)
    126 {
    127         if (!is_zero(c, inst->src[arg]))
    128                 return false;
    129         replace_with_mov(c, inst, inst->src[arg]);
    130         return true;
    131 }
    132 
    133 static bool
    134 fmul_replace_one(struct vc4_compile *c,
    135                  struct qinst *inst,
    136                  int arg)
    137 {
    138         if (!is_1f(c, inst->src[arg]))
    139                 return false;
    140         replace_with_mov(c, inst, inst->src[1 - arg]);
    141         return true;
    142 }
    143 
    144 bool
    145 qir_opt_algebraic(struct vc4_compile *c)
    146 {
    147         bool progress = false;
    148 
    149         qir_for_each_inst_inorder(inst, c) {
    150                 switch (inst->op) {
    151                 case QOP_FMIN:
    152                         if (is_1f(c, inst->src[1]) &&
    153                             inst->src[0].pack >= QPU_UNPACK_8D_REP &&
    154                             inst->src[0].pack <= QPU_UNPACK_8D) {
    155                                 replace_with_mov(c, inst, inst->src[0]);
    156                                 progress = true;
    157                         }
    158                         break;
    159 
    160                 case QOP_FMAX:
    161                         if (is_zero(c, inst->src[1]) &&
    162                             inst->src[0].pack >= QPU_UNPACK_8D_REP &&
    163                             inst->src[0].pack <= QPU_UNPACK_8D) {
    164                                 replace_with_mov(c, inst, inst->src[0]);
    165                                 progress = true;
    166                         }
    167                         break;
    168 
    169                 case QOP_FSUB:
    170                 case QOP_SUB:
    171                         if (is_zero(c, inst->src[1])) {
    172                                 replace_with_mov(c, inst, inst->src[0]);
    173                                 progress = true;
    174                         }
    175                         break;
    176 
    177                 case QOP_ADD:
    178                         /* Kernel validation requires that we use an actual
    179                          * add instruction.
    180                          */
    181                         if (inst->dst.file != QFILE_TEX_S_DIRECT &&
    182                             (replace_x_0_with_x(c, inst, 0) ||
    183                              replace_x_0_with_x(c, inst, 1))) {
    184                                 progress = true;
    185                                 break;
    186                         }
    187                         break;
    188 
    189                 case QOP_FADD:
    190                         if (replace_x_0_with_x(c, inst, 0) ||
    191                             replace_x_0_with_x(c, inst, 1)) {
    192                                 progress = true;
    193                                 break;
    194                         }
    195 
    196                         /* FADD(a, FSUB(0, b)) -> FSUB(a, b) */
    197                         if (inst->src[1].file == QFILE_TEMP &&
    198                             c->defs[inst->src[1].index] &&
    199                             c->defs[inst->src[1].index]->op == QOP_FSUB) {
    200                                 struct qinst *fsub = c->defs[inst->src[1].index];
    201                                 if (is_zero(c, fsub->src[0])) {
    202                                         dump_from(c, inst);
    203                                         inst->op = QOP_FSUB;
    204                                         inst->src[1] = fsub->src[1];
    205                                         progress = true;
    206                                         dump_to(c, inst);
    207                                         break;
    208                                 }
    209                         }
    210 
    211                         /* FADD(FSUB(0, b), a) -> FSUB(a, b) */
    212                         if (inst->src[0].file == QFILE_TEMP &&
    213                             c->defs[inst->src[0].index] &&
    214                             c->defs[inst->src[0].index]->op == QOP_FSUB) {
    215                                 struct qinst *fsub = c->defs[inst->src[0].index];
    216                                 if (is_zero(c, fsub->src[0])) {
    217                                         dump_from(c, inst);
    218                                         inst->op = QOP_FSUB;
    219                                         inst->src[0] = inst->src[1];
    220                                         inst->src[1] = fsub->src[1];
    221                                         dump_to(c, inst);
    222                                         progress = true;
    223                                         break;
    224                                 }
    225                         }
    226                         break;
    227 
    228                 case QOP_FMUL:
    229                         if (!inst->dst.pack &&
    230                             (replace_x_0_with_0(c, inst, 0) ||
    231                              replace_x_0_with_0(c, inst, 1) ||
    232                              fmul_replace_one(c, inst, 0) ||
    233                              fmul_replace_one(c, inst, 1))) {
    234                                 progress = true;
    235                                 break;
    236                         }
    237                         break;
    238 
    239                 case QOP_MUL24:
    240                         if (!inst->dst.pack &&
    241                             (replace_x_0_with_0(c, inst, 0) ||
    242                              replace_x_0_with_0(c, inst, 1))) {
    243                                 progress = true;
    244                                 break;
    245                         }
    246                         break;
    247 
    248                 case QOP_AND:
    249                         if (replace_x_0_with_0(c, inst, 0) ||
    250                             replace_x_0_with_0(c, inst, 1)) {
    251                                 progress = true;
    252                                 break;
    253                         }
    254 
    255                         if (is_constant_value(c, inst->src[0], ~0)) {
    256                                 replace_with_mov(c, inst, inst->src[1]);
    257                                 progress = true;
    258                                 break;
    259                         }
    260                         if (is_constant_value(c, inst->src[1], ~0)) {
    261                                 replace_with_mov(c, inst, inst->src[0]);
    262                                 progress = true;
    263                                 break;
    264                         }
    265                         break;
    266 
    267                 case QOP_OR:
    268                         if (replace_x_0_with_x(c, inst, 0) ||
    269                             replace_x_0_with_x(c, inst, 1)) {
    270                                 progress = true;
    271                                 break;
    272                         }
    273                         break;
    274 
    275                 case QOP_RCP:
    276                         if (is_1f(c, inst->src[0])) {
    277                                 replace_with_mov(c, inst, inst->src[0]);
    278                                 progress = true;
    279                                 break;
    280                         }
    281                         break;
    282 
    283                 default:
    284                         break;
    285                 }
    286         }
    287 
    288         return progress;
    289 }
    290