Home | History | Annotate | Download | only in vc4
      1 /*
      2  * Copyright  2012 Intel Corporation
      3  * Copyright  2016 Broadcom
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     22  * IN THE SOFTWARE.
     23  */
     24 
     25 #define MAX_INSTRUCTION (1 << 30)
     26 
     27 #include "util/ralloc.h"
     28 #include "util/register_allocate.h"
     29 #include "vc4_context.h"
     30 #include "vc4_qir.h"
     31 
     32 struct partial_update_state {
     33         struct qinst *insts[4];
     34         uint8_t channels;
     35 };
     36 
     37 static uint32_t
     38 int_hash(const void *key)
     39 {
     40         return _mesa_hash_data(key, sizeof(int));
     41 }
     42 
     43 static bool
     44 int_compare(const void *key1, const void *key2)
     45 {
     46         return *(const int *)key1 == *(const int *)key2;
     47 }
     48 
     49 static int
     50 qir_reg_to_var(struct qreg reg)
     51 {
     52         if (reg.file == QFILE_TEMP)
     53                 return reg.index;
     54 
     55         return -1;
     56 }
     57 
     58 static void
     59 qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip,
     60               struct qreg src)
     61 {
     62         int var = qir_reg_to_var(src);
     63         if (var == -1)
     64                 return;
     65 
     66         c->temp_start[var] = MIN2(c->temp_start[var], ip);
     67         c->temp_end[var] = MAX2(c->temp_end[var], ip);
     68 
     69         /* The use[] bitset marks when the block makes
     70          * use of a variable without having completely
     71          * defined that variable within the block.
     72          */
     73         if (!BITSET_TEST(block->def, var))
     74                 BITSET_SET(block->use, var);
     75 }
     76 
     77 static struct partial_update_state *
     78 get_partial_update_state(struct hash_table *partial_update_ht,
     79                          struct qinst *inst)
     80 {
     81         struct hash_entry *entry =
     82                 _mesa_hash_table_search(partial_update_ht,
     83                                         &inst->dst.index);
     84         if (entry)
     85                 return entry->data;
     86 
     87         struct partial_update_state *state =
     88                 rzalloc(partial_update_ht, struct partial_update_state);
     89 
     90         _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
     91 
     92         return state;
     93 }
     94 
     95 static void
     96 qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip,
     97               struct hash_table *partial_update_ht, struct qinst *inst)
     98 {
     99         /* The def[] bitset marks when an initialization in a
    100          * block completely screens off previous updates of
    101          * that variable.
    102          */
    103         int var = qir_reg_to_var(inst->dst);
    104         if (var == -1)
    105                 return;
    106 
    107         c->temp_start[var] = MIN2(c->temp_start[var], ip);
    108         c->temp_end[var] = MAX2(c->temp_end[var], ip);
    109 
    110         /* If we've already tracked this as a def, or already used it within
    111          * the block, there's nothing to do.
    112          */
    113         if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
    114                 return;
    115 
    116         /* Easy, common case: unconditional full register update.
    117          *
    118          * We treat conditioning on the exec mask as the same as not being
    119          * conditional.  This makes sure that if the register gets set on
    120          * either side of an if, it is treated as being screened off before
    121          * the if.  Otherwise, if there was no intervening def, its live
    122          * interval doesn't extend back to the start of he program, and if too
    123          * many registers did that we'd fail to register allocate.
    124          */
    125         if ((inst->cond == QPU_COND_ALWAYS ||
    126              inst->cond_is_exec_mask) && !inst->dst.pack) {
    127                 BITSET_SET(block->def, var);
    128                 return;
    129         }
    130 
    131         /* Finally, look at the condition code and packing and mark it as a
    132          * def.  We need to make sure that we understand sequences
    133          * instructions like:
    134          *
    135          *     mov.zs t0, t1
    136          *     mov.zc t0, t2
    137          *
    138          * or:
    139          *
    140          *     mmov t0.8a, t1
    141          *     mmov t0.8b, t2
    142          *     mmov t0.8c, t3
    143          *     mmov t0.8d, t4
    144          *
    145          * as defining the temp within the block, because otherwise dst's live
    146          * range will get extended up the control flow to the top of the
    147          * program.
    148          */
    149         struct partial_update_state *state =
    150                 get_partial_update_state(partial_update_ht, inst);
    151         uint8_t mask = qir_channels_written(inst);
    152 
    153         if (inst->cond == QPU_COND_ALWAYS) {
    154                 state->channels |= mask;
    155         } else {
    156                 for (int i = 0; i < 4; i++) {
    157                         if (!(mask & (1 << i)))
    158                                 continue;
    159 
    160                         if (state->insts[i] &&
    161                             state->insts[i]->cond ==
    162                             qpu_cond_complement(inst->cond))
    163                                 state->channels |= 1 << i;
    164                         else
    165                                 state->insts[i] = inst;
    166                 }
    167         }
    168 
    169         if (state->channels == 0xf)
    170                 BITSET_SET(block->def, var);
    171 }
    172 
    173 static void
    174 sf_state_clear(struct hash_table *partial_update_ht)
    175 {
    176         struct hash_entry *entry;
    177 
    178         hash_table_foreach(partial_update_ht, entry) {
    179                 struct partial_update_state *state = entry->data;
    180 
    181                 for (int i = 0; i < 4; i++) {
    182                         if (state->insts[i] && state->insts[i]->cond)
    183                                 state->insts[i] = NULL;
    184                 }
    185         }
    186 }
    187 
    188 /* Sets up the def/use arrays for when variables are used-before-defined or
    189  * defined-before-used in the block.
    190  *
    191  * Also initializes the temp_start/temp_end to cover just the instruction IPs
    192  * where the variable is used, which will be extended later in
    193  * qir_compute_start_end().
    194  */
    195 static void
    196 qir_setup_def_use(struct vc4_compile *c)
    197 {
    198         struct hash_table *partial_update_ht =
    199                 _mesa_hash_table_create(c, int_hash, int_compare);
    200         int ip = 0;
    201 
    202         qir_for_each_block(block, c) {
    203                 block->start_ip = ip;
    204 
    205                 _mesa_hash_table_clear(partial_update_ht, NULL);
    206 
    207                 qir_for_each_inst(inst, block) {
    208                         for (int i = 0; i < qir_get_nsrc(inst); i++)
    209                                 qir_setup_use(c, block, ip, inst->src[i]);
    210 
    211                         qir_setup_def(c, block, ip, partial_update_ht, inst);
    212 
    213                         if (inst->sf)
    214                                 sf_state_clear(partial_update_ht);
    215 
    216                         switch (inst->op) {
    217                         case QOP_FRAG_Z:
    218                         case QOP_FRAG_W:
    219                                 /* The payload registers have values
    220                                  * implicitly loaded at the start of the
    221                                  * program.
    222                                  */
    223                                 if (inst->dst.file == QFILE_TEMP)
    224                                         c->temp_start[inst->dst.index] = 0;
    225                                 break;
    226                         default:
    227                                 break;
    228                         }
    229                         ip++;
    230                 }
    231                 block->end_ip = ip;
    232         }
    233 
    234         _mesa_hash_table_destroy(partial_update_ht, NULL);
    235 }
    236 
    237 static bool
    238 qir_live_variables_dataflow(struct vc4_compile *c, int bitset_words)
    239 {
    240         bool cont = false;
    241 
    242         qir_for_each_block_rev(block, c) {
    243                 /* Update live_out: Any successor using the variable
    244                  * on entrance needs us to have the variable live on
    245                  * exit.
    246                  */
    247                 qir_for_each_successor(succ, block) {
    248                         for (int i = 0; i < bitset_words; i++) {
    249                                 BITSET_WORD new_live_out = (succ->live_in[i] &
    250                                                             ~block->live_out[i]);
    251                                 if (new_live_out) {
    252                                         block->live_out[i] |= new_live_out;
    253                                         cont = true;
    254                                 }
    255                         }
    256                 }
    257 
    258                 /* Update live_in */
    259                 for (int i = 0; i < bitset_words; i++) {
    260                         BITSET_WORD new_live_in = (block->use[i] |
    261                                                    (block->live_out[i] &
    262                                                     ~block->def[i]));
    263                         if (new_live_in & ~block->live_in[i]) {
    264                                 block->live_in[i] |= new_live_in;
    265                                 cont = true;
    266                         }
    267                 }
    268         }
    269 
    270         return cont;
    271 }
    272 
    273 /**
    274  * Extend the start/end ranges for each variable to account for the
    275  * new information calculated from control flow.
    276  */
    277 static void
    278 qir_compute_start_end(struct vc4_compile *c, int num_vars)
    279 {
    280         qir_for_each_block(block, c) {
    281                 for (int i = 0; i < num_vars; i++) {
    282                         if (BITSET_TEST(block->live_in, i)) {
    283                                 c->temp_start[i] = MIN2(c->temp_start[i],
    284                                                         block->start_ip);
    285                                 c->temp_end[i] = MAX2(c->temp_end[i],
    286                                                       block->start_ip);
    287                         }
    288 
    289                         if (BITSET_TEST(block->live_out, i)) {
    290                                 c->temp_start[i] = MIN2(c->temp_start[i],
    291                                                         block->end_ip);
    292                                 c->temp_end[i] = MAX2(c->temp_end[i],
    293                                                       block->end_ip);
    294                         }
    295                 }
    296         }
    297 }
    298 
    299 void
    300 qir_calculate_live_intervals(struct vc4_compile *c)
    301 {
    302         int bitset_words = BITSET_WORDS(c->num_temps);
    303 
    304         /* If we called this function more than once, then we should be
    305          * freeing the previous arrays.
    306          */
    307         assert(!c->temp_start);
    308 
    309         c->temp_start = rzalloc_array(c, int, c->num_temps);
    310         c->temp_end = rzalloc_array(c, int, c->num_temps);
    311 
    312         for (int i = 0; i < c->num_temps; i++) {
    313                 c->temp_start[i] = MAX_INSTRUCTION;
    314                 c->temp_end[i] = -1;
    315         }
    316 
    317         qir_for_each_block(block, c) {
    318                 block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
    319                 block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
    320                 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
    321                 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
    322         }
    323 
    324         qir_setup_def_use(c);
    325 
    326         while (qir_live_variables_dataflow(c, bitset_words))
    327                 ;
    328 
    329         qir_compute_start_end(c, c->num_temps);
    330 }
    331