Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2011 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 extern "C" {
     25 #include "main/macros.h"
     26 #include "program/register_allocate.h"
     27 } /* extern "C" */
     28 
     29 #include "brw_vec4.h"
     30 #include "glsl/ir_print_visitor.h"
     31 
     32 using namespace brw;
     33 
     34 namespace brw {
     35 
     36 static void
     37 assign(unsigned int *reg_hw_locations, reg *reg)
     38 {
     39    if (reg->file == GRF) {
     40       reg->reg = reg_hw_locations[reg->reg];
     41    }
     42 }
     43 
     44 bool
     45 vec4_visitor::reg_allocate_trivial()
     46 {
     47    unsigned int hw_reg_mapping[this->virtual_grf_count];
     48    bool virtual_grf_used[this->virtual_grf_count];
     49    int i;
     50    int next;
     51 
     52    /* Calculate which virtual GRFs are actually in use after whatever
     53     * optimization passes have occurred.
     54     */
     55    for (int i = 0; i < this->virtual_grf_count; i++) {
     56       virtual_grf_used[i] = false;
     57    }
     58 
     59    foreach_iter(exec_list_iterator, iter, this->instructions) {
     60       vec4_instruction *inst = (vec4_instruction *)iter.get();
     61 
     62       if (inst->dst.file == GRF)
     63 	 virtual_grf_used[inst->dst.reg] = true;
     64 
     65       for (int i = 0; i < 3; i++) {
     66 	 if (inst->src[i].file == GRF)
     67 	    virtual_grf_used[inst->src[i].reg] = true;
     68       }
     69    }
     70 
     71    hw_reg_mapping[0] = this->first_non_payload_grf;
     72    next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
     73    for (i = 1; i < this->virtual_grf_count; i++) {
     74       if (virtual_grf_used[i]) {
     75 	 hw_reg_mapping[i] = next;
     76 	 next += this->virtual_grf_sizes[i];
     77       }
     78    }
     79    prog_data->total_grf = next;
     80 
     81    foreach_iter(exec_list_iterator, iter, this->instructions) {
     82       vec4_instruction *inst = (vec4_instruction *)iter.get();
     83 
     84       assign(hw_reg_mapping, &inst->dst);
     85       assign(hw_reg_mapping, &inst->src[0]);
     86       assign(hw_reg_mapping, &inst->src[1]);
     87       assign(hw_reg_mapping, &inst->src[2]);
     88    }
     89 
     90    if (prog_data->total_grf > max_grf) {
     91       fail("Ran out of regs on trivial allocator (%d/%d)\n",
     92 	   prog_data->total_grf, max_grf);
     93       return false;
     94    }
     95 
     96    return true;
     97 }
     98 
     99 static void
    100 brw_alloc_reg_set_for_classes(struct brw_context *brw,
    101 			      int *class_sizes,
    102 			      int class_count,
    103 			      int base_reg_count)
    104 {
    105    /* Compute the total number of registers across all classes. */
    106    int ra_reg_count = 0;
    107    for (int i = 0; i < class_count; i++) {
    108       ra_reg_count += base_reg_count - (class_sizes[i] - 1);
    109    }
    110 
    111    ralloc_free(brw->vs.ra_reg_to_grf);
    112    brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
    113    ralloc_free(brw->vs.regs);
    114    brw->vs.regs = ra_alloc_reg_set(brw, ra_reg_count);
    115    ralloc_free(brw->vs.classes);
    116    brw->vs.classes = ralloc_array(brw, int, class_count + 1);
    117 
    118    /* Now, add the registers to their classes, and add the conflicts
    119     * between them and the base GRF registers (and also each other).
    120     */
    121    int reg = 0;
    122    for (int i = 0; i < class_count; i++) {
    123       int class_reg_count = base_reg_count - (class_sizes[i] - 1);
    124       brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
    125 
    126       for (int j = 0; j < class_reg_count; j++) {
    127 	 ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
    128 
    129 	 brw->vs.ra_reg_to_grf[reg] = j;
    130 
    131 	 for (int base_reg = j;
    132 	      base_reg < j + class_sizes[i];
    133 	      base_reg++) {
    134 	    ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
    135 	 }
    136 
    137 	 reg++;
    138       }
    139    }
    140    assert(reg == ra_reg_count);
    141 
    142    ra_set_finalize(brw->vs.regs);
    143 }
    144 
    145 bool
    146 vec4_visitor::reg_allocate()
    147 {
    148    unsigned int hw_reg_mapping[virtual_grf_count];
    149    int first_assigned_grf = this->first_non_payload_grf;
    150    int base_reg_count = max_grf - first_assigned_grf;
    151    int class_sizes[base_reg_count];
    152    int class_count = 0;
    153 
    154    /* Using the trivial allocator can be useful in debugging undefined
    155     * register access as a result of broken optimization passes.
    156     */
    157    if (0)
    158       return reg_allocate_trivial();
    159 
    160    calculate_live_intervals();
    161 
    162    /* Set up the register classes.
    163     *
    164     * The base registers store a vec4.  However, we'll need larger
    165     * storage for arrays, structures, and matrices, which will be sets
    166     * of contiguous registers.
    167     */
    168    class_sizes[class_count++] = 1;
    169 
    170    for (int r = 0; r < virtual_grf_count; r++) {
    171       int i;
    172 
    173       for (i = 0; i < class_count; i++) {
    174 	 if (class_sizes[i] == this->virtual_grf_sizes[r])
    175 	    break;
    176       }
    177       if (i == class_count) {
    178 	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
    179 	    fail("Object too large to register allocate.\n");
    180 	 }
    181 
    182 	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
    183       }
    184    }
    185 
    186    brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
    187 
    188    struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
    189 						    virtual_grf_count);
    190 
    191    for (int i = 0; i < virtual_grf_count; i++) {
    192       for (int c = 0; c < class_count; c++) {
    193 	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
    194 	    ra_set_node_class(g, i, brw->vs.classes[c]);
    195 	    break;
    196 	 }
    197       }
    198 
    199       for (int j = 0; j < i; j++) {
    200 	 if (virtual_grf_interferes(i, j)) {
    201 	    ra_add_node_interference(g, i, j);
    202 	 }
    203       }
    204    }
    205 
    206    if (!ra_allocate_no_spills(g)) {
    207       /* Failed to allocate registers.  Spill a reg, and the caller will
    208        * loop back into here to try again.
    209        */
    210       int reg = choose_spill_reg(g);
    211       if (reg == -1) {
    212          fail("no register to spill\n");
    213       } else {
    214          spill_reg(reg);
    215       }
    216       ralloc_free(g);
    217       return false;
    218    }
    219 
    220    /* Get the chosen virtual registers for each node, and map virtual
    221     * regs in the register classes back down to real hardware reg
    222     * numbers.
    223     */
    224    prog_data->total_grf = first_assigned_grf;
    225    for (int i = 0; i < virtual_grf_count; i++) {
    226       int reg = ra_get_node_reg(g, i);
    227 
    228       hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
    229       prog_data->total_grf = MAX2(prog_data->total_grf,
    230 				  hw_reg_mapping[i] + virtual_grf_sizes[i]);
    231    }
    232 
    233    foreach_list(node, &this->instructions) {
    234       vec4_instruction *inst = (vec4_instruction *)node;
    235 
    236       assign(hw_reg_mapping, &inst->dst);
    237       assign(hw_reg_mapping, &inst->src[0]);
    238       assign(hw_reg_mapping, &inst->src[1]);
    239       assign(hw_reg_mapping, &inst->src[2]);
    240    }
    241 
    242    ralloc_free(g);
    243 
    244    return true;
    245 }
    246 
    247 void
    248 vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
    249 {
    250    float loop_scale = 1.0;
    251 
    252    for (int i = 0; i < this->virtual_grf_count; i++) {
    253       spill_costs[i] = 0.0;
    254       no_spill[i] = virtual_grf_sizes[i] != 1;
    255    }
    256 
    257    /* Calculate costs for spilling nodes.  Call it a cost of 1 per
    258     * spill/unspill we'll have to do, and guess that the insides of
    259     * loops run 10 times.
    260     */
    261    foreach_list(node, &this->instructions) {
    262       vec4_instruction *inst = (vec4_instruction *) node;
    263 
    264       for (unsigned int i = 0; i < 3; i++) {
    265 	 if (inst->src[i].file == GRF) {
    266 	    spill_costs[inst->src[i].reg] += loop_scale;
    267             if (inst->src[i].reladdr)
    268                no_spill[inst->src[i].reg] = true;
    269 	 }
    270       }
    271 
    272       if (inst->dst.file == GRF) {
    273 	 spill_costs[inst->dst.reg] += loop_scale;
    274          if (inst->dst.reladdr)
    275             no_spill[inst->dst.reg] = true;
    276       }
    277 
    278       switch (inst->opcode) {
    279 
    280       case BRW_OPCODE_DO:
    281 	 loop_scale *= 10;
    282 	 break;
    283 
    284       case BRW_OPCODE_WHILE:
    285 	 loop_scale /= 10;
    286 	 break;
    287 
    288       case VS_OPCODE_SCRATCH_READ:
    289       case VS_OPCODE_SCRATCH_WRITE:
    290          for (int i = 0; i < 3; i++) {
    291             if (inst->src[i].file == GRF)
    292                no_spill[inst->src[i].reg] = true;
    293          }
    294 	 if (inst->dst.file == GRF)
    295 	    no_spill[inst->dst.reg] = true;
    296 	 break;
    297 
    298       default:
    299 	 break;
    300       }
    301    }
    302 }
    303 
    304 int
    305 vec4_visitor::choose_spill_reg(struct ra_graph *g)
    306 {
    307    float spill_costs[this->virtual_grf_count];
    308    bool no_spill[this->virtual_grf_count];
    309 
    310    evaluate_spill_costs(spill_costs, no_spill);
    311 
    312    for (int i = 0; i < this->virtual_grf_count; i++) {
    313       if (!no_spill[i])
    314          ra_set_node_spill_cost(g, i, spill_costs[i]);
    315    }
    316 
    317    return ra_get_best_spill_node(g);
    318 }
    319 
    320 void
    321 vec4_visitor::spill_reg(int spill_reg_nr)
    322 {
    323    assert(virtual_grf_sizes[spill_reg_nr] == 1);
    324    unsigned int spill_offset = c->last_scratch++;
    325 
    326    /* Generate spill/unspill instructions for the objects being spilled. */
    327    foreach_list(node, &this->instructions) {
    328       vec4_instruction *inst = (vec4_instruction *) node;
    329 
    330       for (unsigned int i = 0; i < 3; i++) {
    331          if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
    332             src_reg spill_reg = inst->src[i];
    333             inst->src[i].reg = virtual_grf_alloc(1);
    334             dst_reg temp = dst_reg(inst->src[i]);
    335 
    336             /* Only read the necessary channels, to avoid overwriting the rest
    337              * with data that may not have been written to scratch.
    338              */
    339             temp.writemask = 0;
    340             for (int c = 0; c < 4; c++)
    341                temp.writemask |= (1 << BRW_GET_SWZ(inst->src[i].swizzle, c));
    342             assert(temp.writemask != 0);
    343 
    344             emit_scratch_read(inst, temp, spill_reg, spill_offset);
    345          }
    346       }
    347 
    348       if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) {
    349          dst_reg spill_reg = inst->dst;
    350          inst->dst.reg = virtual_grf_alloc(1);
    351 
    352          /* We don't want a swizzle when reading from the source; read the
    353           * whole register and use spill_reg's writemask to select which
    354           * channels to write.
    355           */
    356          src_reg temp = src_reg(inst->dst);
    357          temp.swizzle = BRW_SWIZZLE_XYZW;
    358          emit_scratch_write(inst, temp, spill_reg, spill_offset);
    359       }
    360    }
    361 
    362    this->live_intervals_valid = false;
    363 }
    364 
    365 } /* namespace brw */
    366