Home | History | Annotate | Download | only in shader
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 2012-2013 LunarG, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     22  * DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Chia-I Wu <olv (at) lunarg.com>
     26  */
     27 
     28 #include <stdlib.h> /* for qsort() */
     29 #include "toy_compiler.h"
     30 #include "toy_legalize.h"
     31 
     32 /**
     33  * Live interval of a VRF register.
     34  */
     35 struct linear_scan_live_interval {
     36    int vrf;
     37    int startpoint;
     38    int endpoint;
     39 
     40    /*
     41     * should this be assigned a consecutive register of the previous
     42     * interval's?
     43     */
     44    bool consecutive;
     45 
     46    int reg;
     47 
     48    struct list_head list;
     49 };
     50 
     51 /**
     52  * Linear scan.
     53  */
     54 struct linear_scan {
     55    struct linear_scan_live_interval *intervals;
     56    int max_vrf, num_vrfs;
     57 
     58    int num_regs;
     59 
     60    struct list_head active_list;
     61    int *free_regs;
     62    int num_free_regs;
     63 
     64    int *vrf_mapping;
     65 };
     66 
     67 /**
     68  * Return a chunk of registers to the free register pool.
     69  */
     70 static void
     71 linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
     72 {
     73    unsigned i;
     74 
     75    for (i = 0; i < count; i++)
     76       ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
     77 }
     78 
     79 static int
     80 linear_scan_compare_regs(const void *elem1, const void *elem2)
     81 {
     82    const int *reg1 = elem1;
     83    const int *reg2 = elem2;
     84 
     85    /* in reverse order */
     86    return (*reg2 - *reg1);
     87 }
     88 
     89 /**
     90  * Allocate a chunk of registers from the free register pool.
     91  */
     92 static int
     93 linear_scan_allocate_regs(struct linear_scan *ls, int count)
     94 {
     95    bool sorted = false;
     96    int reg;
     97 
     98    /* simple cases */
     99    if (count > ls->num_free_regs)
    100       return -1;
    101    else if (count == 1)
    102       return ls->free_regs[--ls->num_free_regs];
    103 
    104    /* TODO a free register pool */
    105    /* TODO reserve some regs for spilling */
    106    while (true) {
    107       bool found = false;
    108       int start;
    109 
    110       /*
    111        * find a chunk of registers that have consecutive register
    112        * numbers
    113        */
    114       for (start = ls->num_free_regs - 1; start >= count - 1; start--) {
    115          int i;
    116 
    117          for (i = 1; i < count; i++) {
    118             if (ls->free_regs[start - i] != ls->free_regs[start] + i)
    119                break;
    120          }
    121 
    122          if (i >= count) {
    123             found = true;
    124             break;
    125          }
    126       }
    127 
    128       if (found) {
    129          reg = ls->free_regs[start];
    130 
    131          if (start != ls->num_free_regs - 1) {
    132             start++;
    133             memmove(&ls->free_regs[start - count],
    134                     &ls->free_regs[start],
    135                     sizeof(*ls->free_regs) * (ls->num_free_regs - start));
    136          }
    137          ls->num_free_regs -= count;
    138          break;
    139       }
    140       else if (!sorted) {
    141          /* sort and retry */
    142          qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs),
    143                linear_scan_compare_regs);
    144          sorted = true;
    145       }
    146       else {
    147          /* failed */
    148          reg = -1;
    149          break;
    150       }
    151    }
    152 
    153    return reg;
    154 }
    155 
    156 /**
    157  * Add an interval to the active list.
    158  */
    159 static void
    160 linear_scan_add_active(struct linear_scan *ls,
    161                        struct linear_scan_live_interval *interval)
    162 {
    163    struct linear_scan_live_interval *pos;
    164 
    165    /* keep the active list sorted by endpoints */
    166    LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) {
    167       if (pos->endpoint >= interval->endpoint)
    168          break;
    169    }
    170 
    171    list_addtail(&interval->list, &pos->list);
    172 }
    173 
    174 /**
    175  * Remove an interval from the active list.
    176  */
    177 static void
    178 linear_scan_remove_active(struct linear_scan *ls,
    179                           struct linear_scan_live_interval *interval)
    180 {
    181    list_del(&interval->list);
    182 }
    183 
    184 /**
    185  * Remove intervals that are no longer active from the active list.
    186  */
    187 static void
    188 linear_scan_expire_active(struct linear_scan *ls, int pc)
    189 {
    190    struct linear_scan_live_interval *interval, *next;
    191 
    192    LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) {
    193       /*
    194        * since we sort intervals on the active list by their endpoints, we
    195        * know that this and the rest of the intervals are still active.
    196        */
    197       if (interval->endpoint >= pc)
    198          break;
    199 
    200       linear_scan_remove_active(ls, interval);
    201 
    202       /* recycle the reg */
    203       linear_scan_free_regs(ls, interval->reg, 1);
    204    }
    205 }
    206 
    207 /**
    208  * Spill an interval.
    209  */
    210 static void
    211 linear_scan_spill(struct linear_scan *ls,
    212                   struct linear_scan_live_interval *interval,
    213                   bool is_active)
    214 {
    215    assert(!"no spilling support");
    216 }
    217 
    218 /**
    219  * Spill a range of intervals.
    220  */
    221 static void
    222 linear_scan_spill_range(struct linear_scan *ls, int first, int count)
    223 {
    224    unsigned i;
    225 
    226    for (i = 0; i < count; i++) {
    227       struct linear_scan_live_interval *interval = &ls->intervals[first + i];
    228 
    229       linear_scan_spill(ls, interval, false);
    230    }
    231 }
    232 
    233 /**
    234  * Perform linear scan to allocate registers for the intervals.
    235  */
    236 static bool
    237 linear_scan_run(struct linear_scan *ls)
    238 {
    239    int i;
    240 
    241    i = 0;
    242    while (i < ls->num_vrfs) {
    243       struct linear_scan_live_interval *first = &ls->intervals[i];
    244       int reg, count;
    245 
    246       /*
    247        * GEN6_OPCODE_SEND may write to multiple consecutive registers and we need to
    248        * support that
    249        */
    250       for (count = 1; i + count < ls->num_vrfs; count++) {
    251          const struct linear_scan_live_interval *interval =
    252             &ls->intervals[i + count];
    253 
    254          if (interval->startpoint != first->startpoint ||
    255              !interval->consecutive)
    256             break;
    257       }
    258 
    259       reg = linear_scan_allocate_regs(ls, count);
    260 
    261       /* expire intervals that are no longer active and try again */
    262       if (reg < 0) {
    263          linear_scan_expire_active(ls, first->startpoint);
    264          reg = linear_scan_allocate_regs(ls, count);
    265       }
    266 
    267       /* have to spill some intervals */
    268       if (reg < 0) {
    269          struct linear_scan_live_interval *last_active =
    270             container_of(ls->active_list.prev,
    271                   (struct linear_scan_live_interval *) NULL, list);
    272 
    273          /* heuristically spill the interval that ends last */
    274          if (count > 1 || last_active->endpoint < first->endpoint) {
    275             linear_scan_spill_range(ls, i, count);
    276             i += count;
    277             continue;
    278          }
    279 
    280          /* make some room for the new interval */
    281          linear_scan_spill(ls, last_active, true);
    282          reg = linear_scan_allocate_regs(ls, count);
    283          if (reg < 0) {
    284             assert(!"failed to spill any register");
    285             return false;
    286          }
    287       }
    288 
    289       while (count--) {
    290          struct linear_scan_live_interval *interval = &ls->intervals[i++];
    291 
    292          interval->reg = reg++;
    293          linear_scan_add_active(ls, interval);
    294 
    295          ls->vrf_mapping[interval->vrf] = interval->reg;
    296 
    297          /*
    298           * this should and must be the case because of how we initialized the
    299           * intervals
    300           */
    301          assert(interval->vrf - first->vrf == interval->reg - first->reg);
    302       }
    303    }
    304 
    305    return true;
    306 }
    307 
    308 /**
    309  * Add a new interval.
    310  */
    311 static void
    312 linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc)
    313 {
    314    if (ls->intervals[vrf].vrf)
    315       return;
    316 
    317    ls->intervals[vrf].vrf = vrf;
    318    ls->intervals[vrf].startpoint = pc;
    319 
    320    ls->num_vrfs++;
    321    if (vrf > ls->max_vrf)
    322       ls->max_vrf = vrf;
    323 }
    324 
    325 /**
    326  * Perform (oversimplified?) live variable analysis.
    327  */
    328 static void
    329 linear_scan_init_live_intervals(struct linear_scan *ls,
    330                                 struct toy_compiler *tc)
    331 {
    332    const struct toy_inst *inst;
    333    int pc, do_pc, while_pc;
    334 
    335    pc = 0;
    336    do_pc = -1;
    337    while_pc = -1;
    338 
    339    tc_head(tc);
    340    while ((inst = tc_next_no_skip(tc)) != NULL) {
    341       const int startpoint = (pc <= while_pc) ? do_pc : pc;
    342       const int endpoint = (pc <= while_pc) ? while_pc : pc;
    343       int vrf, i;
    344 
    345       /*
    346        * assume all registers used in this outermost loop are live through out
    347        * the whole loop
    348        */
    349       if (inst->marker) {
    350          if (pc > while_pc) {
    351             struct toy_inst *inst2;
    352             int loop_level = 1;
    353 
    354             assert(inst->opcode == TOY_OPCODE_DO);
    355             do_pc = pc;
    356             while_pc = pc + 1;
    357 
    358             /* find the matching GEN6_OPCODE_WHILE */
    359             LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next,
    360                   &tc->instructions, list) {
    361                if (inst2->marker) {
    362                   assert(inst->opcode == TOY_OPCODE_DO);
    363                   loop_level++;
    364                   continue;
    365                }
    366 
    367                if (inst2->opcode == GEN6_OPCODE_WHILE) {
    368                   loop_level--;
    369                   if (!loop_level)
    370                      break;
    371                }
    372                while_pc++;
    373             }
    374          }
    375 
    376          continue;
    377       }
    378 
    379       if (inst->dst.file == TOY_FILE_VRF) {
    380          int num_dst;
    381 
    382          /* TODO this is a hack */
    383          if (inst->opcode == GEN6_OPCODE_SEND ||
    384              inst->opcode == GEN6_OPCODE_SENDC) {
    385             const uint32_t mdesc = inst->src[1].val32;
    386             int response_length = (mdesc >> 20) & 0x1f;
    387 
    388             num_dst = response_length;
    389             if (num_dst > 1 && inst->exec_size == GEN6_EXECSIZE_16)
    390                num_dst /= 2;
    391          }
    392          else {
    393             num_dst = 1;
    394          }
    395 
    396          vrf = inst->dst.val32 / TOY_REG_WIDTH;
    397 
    398          for (i = 0; i < num_dst; i++) {
    399             /* first use */
    400             if (!ls->intervals[vrf].vrf)
    401                linear_scan_add_live_interval(ls, vrf, startpoint);
    402 
    403             ls->intervals[vrf].endpoint = endpoint;
    404             ls->intervals[vrf].consecutive = (i > 0);
    405 
    406             vrf++;
    407          }
    408       }
    409 
    410       for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
    411          if (inst->src[i].file != TOY_FILE_VRF)
    412             continue;
    413 
    414          vrf = inst->src[i].val32 / TOY_REG_WIDTH;
    415 
    416          /* first use */
    417          if (!ls->intervals[vrf].vrf)
    418             linear_scan_add_live_interval(ls, vrf, startpoint);
    419 
    420          ls->intervals[vrf].endpoint = endpoint;
    421       }
    422 
    423       pc++;
    424    }
    425 }
    426 
    427 /**
    428  * Clean up after performing linear scan.
    429  */
    430 static void
    431 linear_scan_cleanup(struct linear_scan *ls)
    432 {
    433    FREE(ls->vrf_mapping);
    434    FREE(ls->intervals);
    435    FREE(ls->free_regs);
    436 }
    437 
    438 static int
    439 linear_scan_compare_live_intervals(const void *elem1, const void *elem2)
    440 {
    441    const struct linear_scan_live_interval *interval1 = elem1;
    442    const struct linear_scan_live_interval *interval2 = elem2;
    443 
    444    /* make unused elements appear at the end */
    445    if (!interval1->vrf)
    446       return 1;
    447    else if (!interval2->vrf)
    448       return -1;
    449 
    450    /* sort by startpoints first, and then by vrf */
    451    if (interval1->startpoint != interval2->startpoint)
    452       return (interval1->startpoint - interval2->startpoint);
    453    else
    454       return (interval1->vrf - interval2->vrf);
    455 
    456 }
    457 
    458 /**
    459  * Prepare for linear scan.
    460  */
    461 static bool
    462 linear_scan_init(struct linear_scan *ls, int num_regs,
    463                  struct toy_compiler *tc)
    464 {
    465    int num_intervals, i;
    466 
    467    memset(ls, 0, sizeof(*ls));
    468 
    469    /* this may be much larger than ls->num_vrfs... */
    470    num_intervals = tc->next_vrf;
    471    ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0]));
    472    if (!ls->intervals)
    473       return false;
    474 
    475    linear_scan_init_live_intervals(ls, tc);
    476    /* sort intervals by startpoints */
    477    qsort(ls->intervals, num_intervals, sizeof(*ls->intervals),
    478          linear_scan_compare_live_intervals);
    479 
    480    ls->num_regs = num_regs;
    481    ls->num_free_regs = num_regs;
    482 
    483    ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs));
    484    if (!ls->free_regs) {
    485       FREE(ls->intervals);
    486       return false;
    487    }
    488 
    489    /* add in reverse order as we will allocate from the tail */
    490    for (i = 0; i < ls->num_regs; i++)
    491       ls->free_regs[i] = num_regs - i - 1;
    492 
    493    list_inithead(&ls->active_list);
    494 
    495    ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping));
    496    if (!ls->vrf_mapping) {
    497       FREE(ls->intervals);
    498       FREE(ls->free_regs);
    499       return false;
    500    }
    501 
    502    return true;
    503 }
    504 
    505 /**
    506  * Allocate registers with linear scan.
    507  */
    508 static void
    509 linear_scan_allocation(struct toy_compiler *tc,
    510                        int start_grf, int end_grf,
    511                        int num_grf_per_vrf)
    512 {
    513    const int num_grfs = end_grf - start_grf + 1;
    514    struct linear_scan ls;
    515    struct toy_inst *inst;
    516 
    517    if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc))
    518       return;
    519 
    520    if (!linear_scan_run(&ls)) {
    521       tc_fail(tc, "failed to allocate registers");
    522       return;
    523    }
    524 
    525 
    526    tc_head(tc);
    527    while ((inst = tc_next(tc)) != NULL) {
    528       int i;
    529 
    530       if (inst->dst.file == TOY_FILE_VRF) {
    531          const uint32_t val32 = inst->dst.val32;
    532          int reg = val32 / TOY_REG_WIDTH;
    533          int subreg = val32 % TOY_REG_WIDTH;
    534 
    535          /* map to GRF */
    536          reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
    537 
    538          inst->dst.file = TOY_FILE_GRF;
    539          inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
    540       }
    541 
    542       for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
    543          const uint32_t val32 = inst->src[i].val32;
    544          int reg, subreg;
    545 
    546          if (inst->src[i].file != TOY_FILE_VRF)
    547             continue;
    548 
    549          reg = val32 / TOY_REG_WIDTH;
    550          subreg = val32 % TOY_REG_WIDTH;
    551 
    552          /* map to GRF */
    553          reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
    554 
    555          inst->src[i].file = TOY_FILE_GRF;
    556          inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
    557       }
    558    }
    559 
    560    linear_scan_cleanup(&ls);
    561 }
    562 
    563 /**
    564  * Trivially allocate registers.
    565  */
    566 static void
    567 trivial_allocation(struct toy_compiler *tc,
    568                    int start_grf, int end_grf,
    569                    int num_grf_per_vrf)
    570 {
    571    struct toy_inst *inst;
    572    int max_grf = -1;
    573 
    574    tc_head(tc);
    575    while ((inst = tc_next(tc)) != NULL) {
    576       int i;
    577 
    578       if (inst->dst.file == TOY_FILE_VRF) {
    579          const uint32_t val32 = inst->dst.val32;
    580          int reg = val32 / TOY_REG_WIDTH;
    581          int subreg = val32 % TOY_REG_WIDTH;
    582 
    583          reg = reg * num_grf_per_vrf + start_grf - 1;
    584 
    585          inst->dst.file = TOY_FILE_GRF;
    586          inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
    587 
    588          if (reg > max_grf)
    589             max_grf = reg;
    590       }
    591 
    592       for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
    593          const uint32_t val32 = inst->src[i].val32;
    594          int reg, subreg;
    595 
    596          if (inst->src[i].file != TOY_FILE_VRF)
    597             continue;
    598 
    599          reg = val32 / TOY_REG_WIDTH;
    600          subreg = val32 % TOY_REG_WIDTH;
    601 
    602          reg = reg * num_grf_per_vrf + start_grf - 1;
    603 
    604          inst->src[i].file = TOY_FILE_GRF;
    605          inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
    606 
    607          if (reg > max_grf)
    608             max_grf = reg;
    609       }
    610    }
    611 
    612    if (max_grf + num_grf_per_vrf - 1 > end_grf)
    613       tc_fail(tc, "failed to allocate registers");
    614 }
    615 
    616 /**
    617  * Allocate GRF registers to VRF registers.
    618  */
    619 void
    620 toy_compiler_allocate_registers(struct toy_compiler *tc,
    621                                 int start_grf, int end_grf,
    622                                 int num_grf_per_vrf)
    623 {
    624    if (true)
    625       linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
    626    else
    627       trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
    628 }
    629