Home | History | Annotate | Download | only in program
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Eric Anholt <eric (at) anholt.net>
     25  *
     26  */
     27 
     28 /** @file register_allocate.c
     29  *
     30  * Graph-coloring register allocator.
     31  *
     32  * The basic idea of graph coloring is to make a node in a graph for
     33  * every thing that needs a register (color) number assigned, and make
     34  * edges in the graph between nodes that interfere (can't be allocated
     35  * to the same register at the same time).
     36  *
     37  * During the "simplify" process, any any node with fewer edges than
     38  * there are registers means that that edge can get assigned a
     39  * register regardless of what its neighbors choose, so that node is
     40  * pushed on a stack and removed (with its edges) from the graph.
     41  * That likely causes other nodes to become trivially colorable as well.
     42  *
     43  * Then during the "select" process, nodes are popped off of that
     44  * stack, their edges restored, and assigned a color different from
     45  * their neighbors.  Because they were pushed on the stack only when
     46  * they were trivially colorable, any color chosen won't interfere
     47  * with the registers to be popped later.
     48  *
     49  * The downside to most graph coloring is that real hardware often has
     50  * limitations, like registers that need to be allocated to a node in
     51  * pairs, or aligned on some boundary.  This implementation follows
     52  * the paper "Retargetable Graph-Coloring Register Allocation for
     53  * Irregular Architectures" by Johan Runeson and Sven-Olof Nystrm.
     54  *
     55  * In this system, there are register classes each containing various
     56  * registers, and registers may interfere with other registers.  For
     57  * example, one might have a class of base registers, and a class of
     58  * aligned register pairs that would each interfere with their pair of
     59  * the base registers.  Each node has a register class it needs to be
     60  * assigned to.  Define p(B) to be the size of register class B, and
     61  * q(B,C) to be the number of registers in B that the worst choice
     62  * register in C could conflict with.  Then, this system replaces the
     63  * basic graph coloring test of "fewer edges from this node than there
     64  * are registers" with "For this node of class B, the sum of q(B,C)
     65  * for each neighbor node of class C is less than pB".
     66  *
     67  * A nice feature of the pq test is that q(B,C) can be computed once
     68  * up front and stored in a 2-dimensional array, so that the cost of
     69  * coloring a node is constant with the number of registers.  We do
     70  * this during ra_set_finalize().
     71  */
     72 
     73 #include <ralloc.h>
     74 
     75 #include "main/imports.h"
     76 #include "main/macros.h"
     77 #include "main/mtypes.h"
     78 #include "register_allocate.h"
     79 
     80 #define NO_REG ~0
     81 
     82 struct ra_reg {
     83    GLboolean *conflicts;
     84    unsigned int *conflict_list;
     85    unsigned int conflict_list_size;
     86    unsigned int num_conflicts;
     87 };
     88 
     89 struct ra_regs {
     90    struct ra_reg *regs;
     91    unsigned int count;
     92 
     93    struct ra_class **classes;
     94    unsigned int class_count;
     95 };
     96 
     97 struct ra_class {
     98    GLboolean *regs;
     99 
    100    /**
    101     * p(B) in Runeson/Nystrm paper.
    102     *
    103     * This is "how many regs are in the set."
    104     */
    105    unsigned int p;
    106 
    107    /**
    108     * q(B,C) (indexed by C, B is this register class) in
    109     * Runeson/Nystrm paper.  This is "how many registers of B could
    110     * the worst choice register from C conflict with".
    111     */
    112    unsigned int *q;
    113 };
    114 
    115 struct ra_node {
    116    /** @{
    117     *
    118     * List of which nodes this node interferes with.  This should be
    119     * symmetric with the other node.
    120     */
    121    GLboolean *adjacency;
    122    unsigned int *adjacency_list;
    123    unsigned int adjacency_count;
    124    /** @} */
    125 
    126    unsigned int class;
    127 
    128    /* Register, if assigned, or NO_REG. */
    129    unsigned int reg;
    130 
    131    /**
    132     * Set when the node is in the trivially colorable stack.  When
    133     * set, the adjacency to this node is ignored, to implement the
    134     * "remove the edge from the graph" in simplification without
    135     * having to actually modify the adjacency_list.
    136     */
    137    GLboolean in_stack;
    138 
    139    /* For an implementation that needs register spilling, this is the
    140     * approximate cost of spilling this node.
    141     */
    142    float spill_cost;
    143 };
    144 
    145 struct ra_graph {
    146    struct ra_regs *regs;
    147    /**
    148     * the variables that need register allocation.
    149     */
    150    struct ra_node *nodes;
    151    unsigned int count; /**< count of nodes. */
    152 
    153    unsigned int *stack;
    154    unsigned int stack_count;
    155 };
    156 
    157 /**
    158  * Creates a set of registers for the allocator.
    159  *
    160  * mem_ctx is a ralloc context for the allocator.  The reg set may be freed
    161  * using ralloc_free().
    162  */
    163 struct ra_regs *
    164 ra_alloc_reg_set(void *mem_ctx, unsigned int count)
    165 {
    166    unsigned int i;
    167    struct ra_regs *regs;
    168 
    169    regs = rzalloc(mem_ctx, struct ra_regs);
    170    regs->count = count;
    171    regs->regs = rzalloc_array(regs, struct ra_reg, count);
    172 
    173    for (i = 0; i < count; i++) {
    174       regs->regs[i].conflicts = rzalloc_array(regs->regs, GLboolean, count);
    175       regs->regs[i].conflicts[i] = GL_TRUE;
    176 
    177       regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4);
    178       regs->regs[i].conflict_list_size = 4;
    179       regs->regs[i].conflict_list[0] = i;
    180       regs->regs[i].num_conflicts = 1;
    181    }
    182 
    183    return regs;
    184 }
    185 
    186 static void
    187 ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
    188 {
    189    struct ra_reg *reg1 = &regs->regs[r1];
    190 
    191    if (reg1->conflict_list_size == reg1->num_conflicts) {
    192       reg1->conflict_list_size *= 2;
    193       reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list,
    194 				     unsigned int, reg1->conflict_list_size);
    195    }
    196    reg1->conflict_list[reg1->num_conflicts++] = r2;
    197    reg1->conflicts[r2] = GL_TRUE;
    198 }
    199 
    200 void
    201 ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
    202 {
    203    if (!regs->regs[r1].conflicts[r2]) {
    204       ra_add_conflict_list(regs, r1, r2);
    205       ra_add_conflict_list(regs, r2, r1);
    206    }
    207 }
    208 
    209 /**
    210  * Adds a conflict between base_reg and reg, and also between reg and
    211  * anything that base_reg conflicts with.
    212  *
    213  * This can simplify code for setting up multiple register classes
    214  * which are aggregates of some base hardware registers, compared to
    215  * explicitly using ra_add_reg_conflict.
    216  */
    217 void
    218 ra_add_transitive_reg_conflict(struct ra_regs *regs,
    219 			       unsigned int base_reg, unsigned int reg)
    220 {
    221    int i;
    222 
    223    ra_add_reg_conflict(regs, reg, base_reg);
    224 
    225    for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
    226       ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
    227    }
    228 }
    229 
    230 unsigned int
    231 ra_alloc_reg_class(struct ra_regs *regs)
    232 {
    233    struct ra_class *class;
    234 
    235    regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *,
    236 			    regs->class_count + 1);
    237 
    238    class = rzalloc(regs, struct ra_class);
    239    regs->classes[regs->class_count] = class;
    240 
    241    class->regs = rzalloc_array(class, GLboolean, regs->count);
    242 
    243    return regs->class_count++;
    244 }
    245 
    246 void
    247 ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r)
    248 {
    249    struct ra_class *class = regs->classes[c];
    250 
    251    class->regs[r] = GL_TRUE;
    252    class->p++;
    253 }
    254 
    255 /**
    256  * Must be called after all conflicts and register classes have been
    257  * set up and before the register set is used for allocation.
    258  */
    259 void
    260 ra_set_finalize(struct ra_regs *regs)
    261 {
    262    unsigned int b, c;
    263 
    264    for (b = 0; b < regs->class_count; b++) {
    265       regs->classes[b]->q = ralloc_array(regs, unsigned int, regs->class_count);
    266    }
    267 
    268    /* Compute, for each class B and C, how many regs of B an
    269     * allocation to C could conflict with.
    270     */
    271    for (b = 0; b < regs->class_count; b++) {
    272       for (c = 0; c < regs->class_count; c++) {
    273 	 unsigned int rc;
    274 	 int max_conflicts = 0;
    275 
    276 	 for (rc = 0; rc < regs->count; rc++) {
    277 	    int conflicts = 0;
    278 	    int i;
    279 
    280 	    if (!regs->classes[c]->regs[rc])
    281 	       continue;
    282 
    283 	    for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
    284 	       unsigned int rb = regs->regs[rc].conflict_list[i];
    285 	       if (regs->classes[b]->regs[rb])
    286 		  conflicts++;
    287 	    }
    288 	    max_conflicts = MAX2(max_conflicts, conflicts);
    289 	 }
    290 	 regs->classes[b]->q[c] = max_conflicts;
    291       }
    292    }
    293 }
    294 
    295 static void
    296 ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2)
    297 {
    298    g->nodes[n1].adjacency[n2] = GL_TRUE;
    299    g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2;
    300    g->nodes[n1].adjacency_count++;
    301 }
    302 
    303 struct ra_graph *
    304 ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
    305 {
    306    struct ra_graph *g;
    307    unsigned int i;
    308 
    309    g = rzalloc(regs, struct ra_graph);
    310    g->regs = regs;
    311    g->nodes = rzalloc_array(g, struct ra_node, count);
    312    g->count = count;
    313 
    314    g->stack = rzalloc_array(g, unsigned int, count);
    315 
    316    for (i = 0; i < count; i++) {
    317       g->nodes[i].adjacency = rzalloc_array(g, GLboolean, count);
    318       g->nodes[i].adjacency_list = ralloc_array(g, unsigned int, count);
    319       g->nodes[i].adjacency_count = 0;
    320       ra_add_node_adjacency(g, i, i);
    321       g->nodes[i].reg = NO_REG;
    322    }
    323 
    324    return g;
    325 }
    326 
    327 void
    328 ra_set_node_class(struct ra_graph *g,
    329 		  unsigned int n, unsigned int class)
    330 {
    331    g->nodes[n].class = class;
    332 }
    333 
    334 void
    335 ra_add_node_interference(struct ra_graph *g,
    336 			 unsigned int n1, unsigned int n2)
    337 {
    338    if (!g->nodes[n1].adjacency[n2]) {
    339       ra_add_node_adjacency(g, n1, n2);
    340       ra_add_node_adjacency(g, n2, n1);
    341    }
    342 }
    343 
    344 static GLboolean pq_test(struct ra_graph *g, unsigned int n)
    345 {
    346    unsigned int j;
    347    unsigned int q = 0;
    348    int n_class = g->nodes[n].class;
    349 
    350    for (j = 0; j < g->nodes[n].adjacency_count; j++) {
    351       unsigned int n2 = g->nodes[n].adjacency_list[j];
    352       unsigned int n2_class = g->nodes[n2].class;
    353 
    354       if (n != n2 && !g->nodes[n2].in_stack) {
    355 	 q += g->regs->classes[n_class]->q[n2_class];
    356       }
    357    }
    358 
    359    return q < g->regs->classes[n_class]->p;
    360 }
    361 
    362 /**
    363  * Simplifies the interference graph by pushing all
    364  * trivially-colorable nodes into a stack of nodes to be colored,
    365  * removing them from the graph, and rinsing and repeating.
    366  *
    367  * Returns GL_TRUE if all nodes were removed from the graph.  GL_FALSE
    368  * means that either spilling will be required, or optimistic coloring
    369  * should be applied.
    370  */
    371 GLboolean
    372 ra_simplify(struct ra_graph *g)
    373 {
    374    GLboolean progress = GL_TRUE;
    375    int i;
    376 
    377    while (progress) {
    378       progress = GL_FALSE;
    379 
    380       for (i = g->count - 1; i >= 0; i--) {
    381 	 if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG)
    382 	    continue;
    383 
    384 	 if (pq_test(g, i)) {
    385 	    g->stack[g->stack_count] = i;
    386 	    g->stack_count++;
    387 	    g->nodes[i].in_stack = GL_TRUE;
    388 	    progress = GL_TRUE;
    389 	 }
    390       }
    391    }
    392 
    393    for (i = 0; i < g->count; i++) {
    394       if (!g->nodes[i].in_stack)
    395 	 return GL_FALSE;
    396    }
    397 
    398    return GL_TRUE;
    399 }
    400 
    401 /**
    402  * Pops nodes from the stack back into the graph, coloring them with
    403  * registers as they go.
    404  *
    405  * If all nodes were trivially colorable, then this must succeed.  If
    406  * not (optimistic coloring), then it may return GL_FALSE;
    407  */
    408 GLboolean
    409 ra_select(struct ra_graph *g)
    410 {
    411    int i;
    412 
    413    while (g->stack_count != 0) {
    414       unsigned int r;
    415       int n = g->stack[g->stack_count - 1];
    416       struct ra_class *c = g->regs->classes[g->nodes[n].class];
    417 
    418       /* Find the lowest-numbered reg which is not used by a member
    419        * of the graph adjacent to us.
    420        */
    421       for (r = 0; r < g->regs->count; r++) {
    422 	 if (!c->regs[r])
    423 	    continue;
    424 
    425 	 /* Check if any of our neighbors conflict with this register choice. */
    426 	 for (i = 0; i < g->nodes[n].adjacency_count; i++) {
    427 	    unsigned int n2 = g->nodes[n].adjacency_list[i];
    428 
    429 	    if (!g->nodes[n2].in_stack &&
    430 		g->regs->regs[r].conflicts[g->nodes[n2].reg]) {
    431 	       break;
    432 	    }
    433 	 }
    434 	 if (i == g->nodes[n].adjacency_count)
    435 	    break;
    436       }
    437       if (r == g->regs->count)
    438 	 return GL_FALSE;
    439 
    440       g->nodes[n].reg = r;
    441       g->nodes[n].in_stack = GL_FALSE;
    442       g->stack_count--;
    443    }
    444 
    445    return GL_TRUE;
    446 }
    447 
    448 /**
    449  * Optimistic register coloring: Just push the remaining nodes
    450  * on the stack.  They'll be colored first in ra_select(), and
    451  * if they succeed then the locally-colorable nodes are still
    452  * locally-colorable and the rest of the register allocation
    453  * will succeed.
    454  */
    455 void
    456 ra_optimistic_color(struct ra_graph *g)
    457 {
    458    unsigned int i;
    459 
    460    for (i = 0; i < g->count; i++) {
    461       if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG)
    462 	 continue;
    463 
    464       g->stack[g->stack_count] = i;
    465       g->stack_count++;
    466       g->nodes[i].in_stack = GL_TRUE;
    467    }
    468 }
    469 
    470 GLboolean
    471 ra_allocate_no_spills(struct ra_graph *g)
    472 {
    473    if (!ra_simplify(g)) {
    474       ra_optimistic_color(g);
    475    }
    476    return ra_select(g);
    477 }
    478 
    479 unsigned int
    480 ra_get_node_reg(struct ra_graph *g, unsigned int n)
    481 {
    482    return g->nodes[n].reg;
    483 }
    484 
    485 /**
    486  * Forces a node to a specific register.  This can be used to avoid
    487  * creating a register class containing one node when handling data
    488  * that must live in a fixed location and is known to not conflict
    489  * with other forced register assignment (as is common with shader
    490  * input data).  These nodes do not end up in the stack during
    491  * ra_simplify(), and thus at ra_select() time it is as if they were
    492  * the first popped off the stack and assigned their fixed locations.
    493  *
    494  * Must be called before ra_simplify().
    495  */
    496 void
    497 ra_set_node_reg(struct ra_graph *g, unsigned int n, unsigned int reg)
    498 {
    499    g->nodes[n].reg = reg;
    500    g->nodes[n].in_stack = GL_FALSE;
    501 }
    502 
    503 static float
    504 ra_get_spill_benefit(struct ra_graph *g, unsigned int n)
    505 {
    506    int j;
    507    float benefit = 0;
    508    int n_class = g->nodes[n].class;
    509 
    510    /* Define the benefit of eliminating an interference between n, n2
    511     * through spilling as q(C, B) / p(C).  This is similar to the
    512     * "count number of edges" approach of traditional graph coloring,
    513     * but takes classes into account.
    514     */
    515    for (j = 0; j < g->nodes[n].adjacency_count; j++) {
    516       unsigned int n2 = g->nodes[n].adjacency_list[j];
    517       if (n != n2) {
    518 	 unsigned int n2_class = g->nodes[n2].class;
    519 	 benefit += ((float)g->regs->classes[n_class]->q[n2_class] /
    520 		     g->regs->classes[n_class]->p);
    521       }
    522    }
    523 
    524    return benefit;
    525 }
    526 
    527 /**
    528  * Returns a node number to be spilled according to the cost/benefit using
    529  * the pq test, or -1 if there are no spillable nodes.
    530  */
    531 int
    532 ra_get_best_spill_node(struct ra_graph *g)
    533 {
    534    unsigned int best_node = -1;
    535    unsigned int best_benefit = 0.0;
    536    unsigned int n;
    537 
    538    for (n = 0; n < g->count; n++) {
    539       float cost = g->nodes[n].spill_cost;
    540       float benefit;
    541 
    542       if (cost <= 0.0)
    543 	 continue;
    544 
    545       benefit = ra_get_spill_benefit(g, n);
    546 
    547       if (benefit / cost > best_benefit) {
    548 	 best_benefit = benefit / cost;
    549 	 best_node = n;
    550       }
    551    }
    552 
    553    return best_node;
    554 }
    555 
    556 /**
    557  * Only nodes with a spill cost set (cost != 0.0) will be considered
    558  * for register spilling.
    559  */
    560 void
    561 ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost)
    562 {
    563    g->nodes[n].spill_cost = cost;
    564 }
    565