Home | History | Annotate | Download | only in state_tracker
      1 /*
      2  * Copyright  2017 Gert Wollny
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "st_glsl_to_tgsi_temprename.h"
     25 #include "tgsi/tgsi_info.h"
     26 #include "tgsi/tgsi_strings.h"
     27 #include "program/prog_instruction.h"
     28 #include "util/bitscan.h"
     29 #include <limits>
     30 #include <cstdlib>
     31 
     32 /* std::sort is significantly faster than qsort */
     33 #define USE_STL_SORT
     34 #ifdef USE_STL_SORT
     35 #include <algorithm>
     36 #endif
     37 
     38 #ifndef NDEBUG
     39 #include <iostream>
     40 #include <iomanip>
     41 #include "program/prog_print.h"
     42 #include "util/debug.h"
     43 using std::cerr;
     44 using std::setw;
     45 #endif
     46 
     47 /* If <windows.h> is included this is defined and clashes with
     48  * std::numeric_limits<>::max()
     49  */
     50 #ifdef max
     51 #undef max
     52 #endif
     53 
     54 using std::numeric_limits;
     55 
     56 /* Without c++11 define the nullptr for forward-compatibility
     57  * and better readibility */
     58 #if __cplusplus < 201103L
     59 #define nullptr 0
     60 #endif
     61 
     62 #ifndef NDEBUG
     63 /* Helper function to check whether we want to seen debugging output */
     64 static inline bool is_debug_enabled ()
     65 {
     66    static int debug_enabled = -1;
     67    if (debug_enabled < 0)
     68       debug_enabled = env_var_as_boolean("GLSL_TO_TGSI_RENAME_DEBUG", false);
     69    return debug_enabled > 0;
     70 }
     71 #define RENAME_DEBUG(X) if (is_debug_enabled()) do { X; } while (false);
     72 #else
     73 #define RENAME_DEBUG(X)
     74 #endif
     75 
     76 namespace {
     77 
     78 enum prog_scope_type {
     79    outer_scope,           /* Outer program scope */
     80    loop_body,             /* Inside a loop */
     81    if_branch,             /* Inside if branch */
     82    else_branch,           /* Inside else branch */
     83    switch_body,           /* Inside switch statmenet */
     84    switch_case_branch,    /* Inside switch case statmenet */
     85    switch_default_branch, /* Inside switch default statmenet */
     86    undefined_scope
     87 };
     88 
     89 class prog_scope {
     90 public:
     91    prog_scope(prog_scope *parent, prog_scope_type type, int id,
     92               int depth, int begin);
     93 
     94    prog_scope_type type() const;
     95    prog_scope *parent() const;
     96    int nesting_depth() const;
     97    int id() const;
     98    int end() const;
     99    int begin() const;
    100    int loop_break_line() const;
    101 
    102    const prog_scope *in_ifelse_scope() const;
    103    const prog_scope *in_switchcase_scope() const;
    104    const prog_scope *innermost_loop() const;
    105    const prog_scope *outermost_loop() const;
    106    const prog_scope *enclosing_conditional() const;
    107 
    108    bool is_loop() const;
    109    bool is_in_loop() const;
    110    bool is_conditional() const;
    111 
    112    bool break_is_for_switchcase() const;
    113    bool contains_range_of(const prog_scope& other) const;
    114 
    115    void set_end(int end);
    116    void set_loop_break_line(int line);
    117 
    118 private:
    119    prog_scope_type scope_type;
    120    int scope_id;
    121    int scope_nesting_depth;
    122    int scope_begin;
    123    int scope_end;
    124    int break_loop_line;
    125    prog_scope *parent_scope;
    126 };
    127 
    128 /* Some storage class to encapsulate the prog_scope (de-)allocations */
    129 class prog_scope_storage {
    130 public:
    131    prog_scope_storage(void *mem_ctx, int n);
    132    ~prog_scope_storage();
    133    prog_scope * create(prog_scope *p, prog_scope_type type, int id,
    134                        int lvl, int s_begin);
    135 private:
    136    void *mem_ctx;
    137    int current_slot;
    138    prog_scope *storage;
    139 };
    140 
    141 class temp_comp_access {
    142 public:
    143    temp_comp_access();
    144    void record_read(int line, prog_scope *scope);
    145    void record_write(int line, prog_scope *scope);
    146    lifetime get_required_lifetime();
    147 private:
    148    void propagate_lifetime_to_dominant_write_scope();
    149 
    150    prog_scope *last_read_scope;
    151    prog_scope *first_read_scope;
    152    prog_scope *first_write_scope;
    153    int first_write;
    154    int last_read;
    155    int last_write;
    156    int first_read;
    157    bool keep_for_full_loop;
    158 };
    159 
    160 class temp_access {
    161 public:
    162    temp_access();
    163    void record_read(int line, prog_scope *scope, int swizzle);
    164    void record_write(int line, prog_scope *scope, int writemask);
    165    lifetime get_required_lifetime();
    166 private:
    167    void update_access_mask(int mask);
    168 
    169    temp_comp_access comp[4];
    170    int access_mask;
    171    bool needs_component_tracking;
    172 };
    173 
    174 prog_scope_storage::prog_scope_storage(void *mc, int n):
    175    mem_ctx(mc),
    176    current_slot(0)
    177 {
    178    storage = ralloc_array(mem_ctx, prog_scope, n);
    179 }
    180 
    181 prog_scope_storage::~prog_scope_storage()
    182 {
    183    ralloc_free(storage);
    184 }
    185 
    186 prog_scope*
    187 prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id,
    188                            int lvl, int s_begin)
    189 {
    190    storage[current_slot] = prog_scope(p, type, id, lvl, s_begin);
    191    return &storage[current_slot++];
    192 }
    193 
    194 prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id,
    195                        int depth, int scope_begin):
    196    scope_type(type),
    197    scope_id(id),
    198    scope_nesting_depth(depth),
    199    scope_begin(scope_begin),
    200    scope_end(-1),
    201    break_loop_line(numeric_limits<int>::max()),
    202    parent_scope(parent)
    203 {
    204 }
    205 
    206 prog_scope_type prog_scope::type() const
    207 {
    208    return scope_type;
    209 }
    210 
    211 prog_scope *prog_scope::parent() const
    212 {
    213    return parent_scope;
    214 }
    215 
    216 int prog_scope::nesting_depth() const
    217 {
    218    return scope_nesting_depth;
    219 }
    220 
    221 bool prog_scope::is_loop() const
    222 {
    223    return (scope_type == loop_body);
    224 }
    225 
    226 bool prog_scope::is_in_loop() const
    227 {
    228    if (scope_type == loop_body)
    229       return true;
    230 
    231    if (parent_scope)
    232       return parent_scope->is_in_loop();
    233 
    234    return false;
    235 }
    236 
    237 const prog_scope *prog_scope::innermost_loop() const
    238 {
    239    if (scope_type == loop_body)
    240       return this;
    241 
    242    if (parent_scope)
    243       return parent_scope->innermost_loop();
    244 
    245    return nullptr;
    246 }
    247 
    248 const prog_scope *prog_scope::outermost_loop() const
    249 {
    250    const prog_scope *loop = nullptr;
    251    const prog_scope *p = this;
    252 
    253    do {
    254       if (p->type() == loop_body)
    255          loop = p;
    256       p = p->parent();
    257    } while (p);
    258 
    259    return loop;
    260 }
    261 
    262 const prog_scope *prog_scope::enclosing_conditional() const
    263 {
    264    if (is_conditional())
    265       return this;
    266 
    267    if (parent_scope)
    268       return parent_scope->enclosing_conditional();
    269 
    270    return nullptr;
    271 }
    272 
    273 bool prog_scope::contains_range_of(const prog_scope& other) const
    274 {
    275    return (begin() <= other.begin()) && (end() >= other.end());
    276 }
    277 
    278 bool prog_scope::is_conditional() const
    279 {
    280    return scope_type == if_branch ||
    281          scope_type == else_branch ||
    282          scope_type == switch_case_branch ||
    283          scope_type == switch_default_branch;
    284 }
    285 
    286 const prog_scope *prog_scope::in_ifelse_scope() const
    287 {
    288    if (scope_type == if_branch ||
    289        scope_type == else_branch)
    290       return this;
    291 
    292    if (parent_scope)
    293       return parent_scope->in_ifelse_scope();
    294 
    295    return nullptr;
    296 }
    297 
    298 const prog_scope *prog_scope::in_switchcase_scope() const
    299 {
    300    if (scope_type == switch_case_branch ||
    301        scope_type == switch_default_branch)
    302       return this;
    303 
    304    if (parent_scope)
    305       return parent_scope->in_switchcase_scope();
    306 
    307    return nullptr;
    308 }
    309 
    310 bool prog_scope::break_is_for_switchcase() const
    311 {
    312    if (scope_type == loop_body)
    313       return false;
    314 
    315    if (scope_type == switch_case_branch ||
    316        scope_type == switch_default_branch ||
    317        scope_type == switch_body)
    318       return true;
    319 
    320    if (parent_scope)
    321       return parent_scope->break_is_for_switchcase();
    322 
    323    return false;
    324 }
    325 
    326 int prog_scope::id() const
    327 {
    328    return scope_id;
    329 }
    330 
    331 int prog_scope::begin() const
    332 {
    333    return scope_begin;
    334 }
    335 
    336 int prog_scope::end() const
    337 {
    338    return scope_end;
    339 }
    340 
    341 void prog_scope::set_end(int end)
    342 {
    343    if (scope_end == -1)
    344       scope_end = end;
    345 }
    346 
    347 void prog_scope::set_loop_break_line(int line)
    348 {
    349    if (scope_type == loop_body) {
    350       break_loop_line = MIN2(break_loop_line, line);
    351    } else {
    352       if (parent_scope)
    353          parent()->set_loop_break_line(line);
    354    }
    355 }
    356 
    357 int prog_scope::loop_break_line() const
    358 {
    359    return break_loop_line;
    360 }
    361 
    362 temp_access::temp_access():
    363    access_mask(0),
    364    needs_component_tracking(false)
    365 {
    366 }
    367 
    368 void temp_access::update_access_mask(int mask)
    369 {
    370    if (access_mask && access_mask != mask)
    371       needs_component_tracking = true;
    372    access_mask |= mask;
    373 }
    374 
    375 void temp_access::record_write(int line, prog_scope *scope, int writemask)
    376 {
    377    update_access_mask(writemask);
    378 
    379    if (writemask & WRITEMASK_X)
    380       comp[0].record_write(line, scope);
    381    if (writemask & WRITEMASK_Y)
    382       comp[1].record_write(line, scope);
    383    if (writemask & WRITEMASK_Z)
    384       comp[2].record_write(line, scope);
    385    if (writemask & WRITEMASK_W)
    386       comp[3].record_write(line, scope);
    387 }
    388 
    389 void temp_access::record_read(int line, prog_scope *scope, int swizzle)
    390 {
    391    int readmask = 0;
    392    for (int idx = 0; idx < 4; ++idx) {
    393       int swz = GET_SWZ(swizzle, idx);
    394       readmask |= (1 << swz) & 0xF;
    395    }
    396    update_access_mask(readmask);
    397 
    398    if (readmask & WRITEMASK_X)
    399       comp[0].record_read(line, scope);
    400    if (readmask & WRITEMASK_Y)
    401       comp[1].record_read(line, scope);
    402    if (readmask & WRITEMASK_Z)
    403       comp[2].record_read(line, scope);
    404    if (readmask & WRITEMASK_W)
    405       comp[3].record_read(line, scope);
    406 }
    407 
    408 inline static lifetime make_lifetime(int b, int e)
    409 {
    410    lifetime lt;
    411    lt.begin = b;
    412    lt.end = e;
    413    return lt;
    414 }
    415 
    416 lifetime temp_access::get_required_lifetime()
    417 {
    418    lifetime result = make_lifetime(-1, -1);
    419 
    420    unsigned mask = access_mask;
    421    while (mask) {
    422       unsigned chan = u_bit_scan(&mask);
    423       lifetime lt = comp[chan].get_required_lifetime();
    424 
    425       if (lt.begin >= 0) {
    426          if ((result.begin < 0) || (result.begin > lt.begin))
    427             result.begin = lt.begin;
    428       }
    429 
    430       if (lt.end > result.end)
    431          result.end = lt.end;
    432 
    433       if (!needs_component_tracking)
    434          break;
    435    }
    436    return result;
    437 }
    438 
    439 temp_comp_access::temp_comp_access():
    440    last_read_scope(nullptr),
    441    first_read_scope(nullptr),
    442    first_write_scope(nullptr),
    443    first_write(-1),
    444    last_read(-1),
    445    last_write(-1),
    446    first_read(numeric_limits<int>::max())
    447 {
    448 }
    449 
    450 void temp_comp_access::record_read(int line, prog_scope *scope)
    451 {
    452    last_read_scope = scope;
    453    last_read = line;
    454 
    455    if (first_read > line) {
    456       first_read = line;
    457       first_read_scope = scope;
    458    }
    459 }
    460 
    461 void temp_comp_access::record_write(int line, prog_scope *scope)
    462 {
    463    last_write = line;
    464 
    465    if (first_write < 0) {
    466       first_write = line;
    467       first_write_scope = scope;
    468    }
    469 }
    470 
    471 void temp_comp_access::propagate_lifetime_to_dominant_write_scope()
    472 {
    473    first_write = first_write_scope->begin();
    474    int lr = first_write_scope->end();
    475 
    476    if (last_read < lr)
    477       last_read = lr;
    478 }
    479 
    480 lifetime temp_comp_access::get_required_lifetime()
    481 {
    482    bool keep_for_full_loop = false;
    483 
    484    /* This register component is not used at all, or only read,
    485     * mark it as unused and ignore it when renaming.
    486     * glsl_to_tgsi_visitor::renumber_registers will take care of
    487     * eliminating registers that are not written to.
    488     */
    489    if (last_write < 0)
    490       return make_lifetime(-1, -1);
    491 
    492    assert(first_write_scope);
    493 
    494    /* Only written to, just make sure the register component is not
    495     * reused in the range it is used to write to
    496     */
    497    if (!last_read_scope)
    498       return make_lifetime(first_write, last_write + 1);
    499 
    500    const prog_scope *enclosing_scope_first_read = first_read_scope;
    501    const prog_scope *enclosing_scope_first_write = first_write_scope;
    502 
    503    /* We read before writing in a loop
    504     * hence the value must survive the loops
    505     */
    506    if ((first_read <= first_write) &&
    507        first_read_scope->is_in_loop()) {
    508       keep_for_full_loop = true;
    509       enclosing_scope_first_read = first_read_scope->outermost_loop();
    510    }
    511 
    512    /* A conditional write within a nested loop must survive
    513     * the outermost loop, but only if it is read outside
    514     * the condition scope where we write.
    515     */
    516    const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional();
    517    if (conditional && conditional->is_in_loop() &&
    518        !conditional->contains_range_of(*last_read_scope)) {
    519       keep_for_full_loop = true;
    520       enclosing_scope_first_write = conditional->outermost_loop();
    521    }
    522 
    523    /* Evaluate the scope that is shared by all: required first write scope,
    524     * required first read before write scope, and last read scope.
    525     */
    526    const prog_scope *enclosing_scope = enclosing_scope_first_read;
    527    if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
    528       enclosing_scope = enclosing_scope_first_write;
    529 
    530    if (last_read_scope->contains_range_of(*enclosing_scope))
    531       enclosing_scope = last_read_scope;
    532 
    533    while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
    534           !enclosing_scope->contains_range_of(*last_read_scope)) {
    535       enclosing_scope = enclosing_scope->parent();
    536       assert(enclosing_scope);
    537    }
    538 
    539    /* Propagate the last read scope to the target scope */
    540    while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
    541       /* If the read is in a loop and we have to move up the scope we need to
    542        * extend the life time to the end of this current loop because at this
    543        * point we don't know whether the component was written before
    544        * un-conditionally in the same loop.
    545        */
    546       if (last_read_scope->is_loop())
    547          last_read = last_read_scope->end();
    548 
    549       last_read_scope = last_read_scope->parent();
    550    }
    551 
    552    /* If the variable has to be kept for the whole loop, and we
    553     * are currently in a loop, then propagate the life time.
    554     */
    555    if (keep_for_full_loop && first_write_scope->is_loop())
    556       propagate_lifetime_to_dominant_write_scope();
    557 
    558    /* Propagate the first_dominant_write scope to the target scope */
    559    while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
    560       /* Propagate lifetime if there was a break in a loop and the write was
    561        * after the break inside that loop. Note, that this is only needed if
    562        * we move up in the scopes.
    563        */
    564       if (first_write_scope->loop_break_line() < first_write) {
    565          keep_for_full_loop = true;
    566          propagate_lifetime_to_dominant_write_scope();
    567       }
    568 
    569       first_write_scope = first_write_scope->parent();
    570 
    571       /* Propagte lifetime if we are now in a loop */
    572       if (keep_for_full_loop && first_write_scope->is_loop())
    573           propagate_lifetime_to_dominant_write_scope();
    574    }
    575 
    576    /* The last write past the last read is dead code, but we have to
    577     * ensure that the component is not reused too early, hence extend the
    578     * lifetime past the last write.
    579     */
    580    if (last_write >= last_read)
    581       last_read = last_write + 1;
    582 
    583    /* Here we are at the same scope, all is resolved */
    584    return make_lifetime(first_write, last_read);
    585 }
    586 
    587 /* Helper class for sorting and searching the registers based
    588  * on life times. */
    589 class access_record {
    590 public:
    591    int begin;
    592    int end;
    593    int reg;
    594    bool erase;
    595 
    596    bool operator < (const access_record& rhs) const {
    597       return begin < rhs.begin;
    598    }
    599 };
    600 
    601 }
    602 
    603 #ifndef NDEBUG
    604 /* Function used for debugging. */
    605 static void dump_instruction(int line, prog_scope *scope,
    606                              const glsl_to_tgsi_instruction& inst);
    607 #endif
    608 
    609 /* Scan the program and estimate the required register life times.
    610  * The array lifetimes must be pre-allocated
    611  */
    612 bool
    613 get_temp_registers_required_lifetimes(void *mem_ctx, exec_list *instructions,
    614                                       int ntemps, struct lifetime *lifetimes)
    615 {
    616    int line = 0;
    617    int loop_id = 0;
    618    int if_id = 0;
    619    int switch_id = 0;
    620    bool is_at_end = false;
    621    bool ok = true;
    622    int n_scopes = 1;
    623 
    624    /* Count scopes to allocate the needed space without the need for
    625     * re-allocation
    626     */
    627    foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
    628       if (inst->op == TGSI_OPCODE_BGNLOOP ||
    629           inst->op == TGSI_OPCODE_SWITCH ||
    630           inst->op == TGSI_OPCODE_CASE ||
    631           inst->op == TGSI_OPCODE_IF ||
    632           inst->op == TGSI_OPCODE_UIF ||
    633           inst->op == TGSI_OPCODE_ELSE ||
    634           inst->op == TGSI_OPCODE_DEFAULT)
    635          ++n_scopes;
    636    }
    637 
    638    prog_scope_storage scopes(mem_ctx, n_scopes);
    639    temp_access *acc = new temp_access[ntemps];
    640 
    641    prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line);
    642 
    643    RENAME_DEBUG(cerr << "========= Begin shader ============\n");
    644 
    645    foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
    646       if (is_at_end) {
    647          assert(!"GLSL_TO_TGSI: shader has instructions past end marker");
    648          break;
    649       }
    650 
    651       RENAME_DEBUG(dump_instruction(line, cur_scope, *inst));
    652 
    653       switch (inst->op) {
    654       case TGSI_OPCODE_BGNLOOP: {
    655          cur_scope = scopes.create(cur_scope, loop_body, loop_id++,
    656                                    cur_scope->nesting_depth() + 1, line);
    657          break;
    658       }
    659       case TGSI_OPCODE_ENDLOOP: {
    660          cur_scope->set_end(line);
    661          cur_scope = cur_scope->parent();
    662          assert(cur_scope);
    663          break;
    664       }
    665       case TGSI_OPCODE_IF:
    666       case TGSI_OPCODE_UIF: {
    667          assert(num_inst_src_regs(inst) == 1);
    668          const st_src_reg& src = inst->src[0];
    669          if (src.file == PROGRAM_TEMPORARY)
    670             acc[src.index].record_read(line, cur_scope, src.swizzle);
    671          cur_scope = scopes.create(cur_scope, if_branch, if_id++,
    672                                    cur_scope->nesting_depth() + 1, line + 1);
    673          break;
    674       }
    675       case TGSI_OPCODE_ELSE: {
    676          assert(cur_scope->type() == if_branch);
    677          cur_scope->set_end(line - 1);
    678          cur_scope = scopes.create(cur_scope->parent(), else_branch,
    679                                    cur_scope->id(), cur_scope->nesting_depth(),
    680                                    line + 1);
    681          break;
    682       }
    683       case TGSI_OPCODE_END: {
    684          cur_scope->set_end(line);
    685          is_at_end = true;
    686          break;
    687       }
    688       case TGSI_OPCODE_ENDIF: {
    689          cur_scope->set_end(line - 1);
    690          cur_scope = cur_scope->parent();
    691          assert(cur_scope);
    692          break;
    693       }
    694       case TGSI_OPCODE_SWITCH: {
    695          assert(num_inst_src_regs(inst) == 1);
    696          const st_src_reg& src = inst->src[0];
    697          prog_scope *scope = scopes.create(cur_scope, switch_body, switch_id++,
    698                                            cur_scope->nesting_depth() + 1, line);
    699          /* We record the read only for the SWITCH statement itself, like it
    700           * is used by the only consumer of TGSI_OPCODE_SWITCH in tgsi_exec.c.
    701           */
    702          if (src.file == PROGRAM_TEMPORARY)
    703             acc[src.index].record_read(line, cur_scope, src.swizzle);
    704          cur_scope = scope;
    705          break;
    706       }
    707       case TGSI_OPCODE_ENDSWITCH: {
    708          cur_scope->set_end(line - 1);
    709          /* Remove the case level, it might not have been
    710           * closed with a break.
    711           */
    712          if (cur_scope->type() != switch_body)
    713             cur_scope = cur_scope->parent();
    714 
    715          cur_scope = cur_scope->parent();
    716          assert(cur_scope);
    717          break;
    718       }
    719       case TGSI_OPCODE_CASE: {
    720          /* Take care of tracking the registers. */
    721          prog_scope *switch_scope = cur_scope->type() == switch_body ?
    722                                        cur_scope : cur_scope->parent();
    723 
    724          assert(num_inst_src_regs(inst) == 1);
    725          const st_src_reg& src = inst->src[0];
    726          if (src.file == PROGRAM_TEMPORARY)
    727             acc[src.index].record_read(line, switch_scope, src.swizzle);
    728 
    729          /* Fall through to allocate the scope. */
    730       }
    731       case TGSI_OPCODE_DEFAULT: {
    732          prog_scope_type t = inst->op == TGSI_OPCODE_CASE ? switch_case_branch
    733                                                        : switch_default_branch;
    734          prog_scope *switch_scope = (cur_scope->type() == switch_body) ?
    735             cur_scope : cur_scope->parent();
    736          assert(switch_scope->type() == switch_body);
    737          prog_scope *scope = scopes.create(switch_scope, t,
    738                                            switch_scope->id(),
    739                                            switch_scope->nesting_depth() + 1,
    740                                            line);
    741          /* Previous case falls through, so scope was not yet closed. */
    742          if ((cur_scope != switch_scope) && (cur_scope->end() == -1))
    743             cur_scope->set_end(line - 1);
    744          cur_scope = scope;
    745          break;
    746       }
    747       case TGSI_OPCODE_BRK: {
    748          if (cur_scope->break_is_for_switchcase()) {
    749             cur_scope->set_end(line - 1);
    750          } else {
    751             cur_scope->set_loop_break_line(line);
    752          }
    753          break;
    754       }
    755       case TGSI_OPCODE_CAL:
    756       case TGSI_OPCODE_RET:
    757          /* These opcodes are not supported and if a subroutine would
    758           * be called in a shader, then the lifetime tracking would have
    759           * to follow that call to see which registers are used there.
    760           * Since this is not done, we have to bail out here and signal
    761           * that no register merge will take place.
    762           */
    763          ok = false;
    764          goto out;
    765       default: {
    766          for (unsigned j = 0; j < num_inst_src_regs(inst); j++) {
    767             const st_src_reg& src = inst->src[j];
    768             if (src.file == PROGRAM_TEMPORARY)
    769                acc[src.index].record_read(line, cur_scope, src.swizzle);
    770          }
    771          for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) {
    772             const st_src_reg& src = inst->tex_offsets[j];
    773             if (src.file == PROGRAM_TEMPORARY)
    774                acc[src.index].record_read(line, cur_scope, src.swizzle);
    775          }
    776          for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
    777             const st_dst_reg& dst = inst->dst[j];
    778             if (dst.file == PROGRAM_TEMPORARY)
    779                acc[dst.index].record_write(line, cur_scope, dst.writemask);
    780          }
    781       }
    782       }
    783       ++line;
    784    }
    785 
    786    RENAME_DEBUG(cerr << "==================================\n\n");
    787 
    788    /* Make sure last scope is closed, even though no
    789     * TGSI_OPCODE_END was given.
    790     */
    791    if (cur_scope->end() < 0)
    792       cur_scope->set_end(line - 1);
    793 
    794    RENAME_DEBUG(cerr << "========= lifetimes ==============\n");
    795    for(int i = 0; i < ntemps; ++i) {
    796       RENAME_DEBUG(cerr << setw(4) << i);
    797       lifetimes[i] = acc[i].get_required_lifetime();
    798       RENAME_DEBUG(cerr << ": [" << lifetimes[i].begin << ", "
    799                         << lifetimes[i].end << "]\n");
    800    }
    801    RENAME_DEBUG(cerr << "==================================\n\n");
    802 
    803 out:
    804    delete[] acc;
    805    return ok;
    806 }
    807 
    808 /* Find the next register between [start, end) that has a life time starting
    809  * at or after bound by using a binary search.
    810  * start points at the beginning of the search range,
    811  * end points at the element past the end of the search range, and
    812  * the array comprising [start, end) must be sorted in ascending order.
    813  */
    814 static access_record*
    815 find_next_rename(access_record* start, access_record* end, int bound)
    816 {
    817    int delta = (end - start);
    818 
    819    while (delta > 0) {
    820       int half = delta >> 1;
    821       access_record* middle = start + half;
    822 
    823       if (bound <= middle->begin) {
    824          delta = half;
    825       } else {
    826          start = middle;
    827          ++start;
    828          delta -= half + 1;
    829       }
    830    }
    831 
    832    return start;
    833 }
    834 
    835 #ifndef USE_STL_SORT
    836 static int access_record_compare (const void *a, const void *b) {
    837    const access_record *aa = static_cast<const access_record*>(a);
    838    const access_record *bb = static_cast<const access_record*>(b);
    839    return aa->begin < bb->begin ? -1 : (aa->begin > bb->begin ? 1 : 0);
    840 }
    841 #endif
    842 
    843 /* This functions evaluates the register merges by using a binary
    844  * search to find suitable merge candidates. */
    845 void get_temp_registers_remapping(void *mem_ctx, int ntemps,
    846                                   const struct lifetime* lifetimes,
    847                                   struct rename_reg_pair *result)
    848 {
    849    access_record *reg_access = ralloc_array(mem_ctx, access_record, ntemps);
    850 
    851    int used_temps = 0;
    852    for (int i = 0; i < ntemps; ++i) {
    853       if (lifetimes[i].begin >= 0) {
    854          reg_access[used_temps].begin = lifetimes[i].begin;
    855          reg_access[used_temps].end = lifetimes[i].end;
    856          reg_access[used_temps].reg = i;
    857          reg_access[used_temps].erase = false;
    858          ++used_temps;
    859       }
    860    }
    861 
    862 #ifdef USE_STL_SORT
    863    std::sort(reg_access, reg_access + used_temps);
    864 #else
    865    std::qsort(reg_access, used_temps, sizeof(access_record), access_record_compare);
    866 #endif
    867 
    868    access_record *trgt = reg_access;
    869    access_record *reg_access_end = reg_access + used_temps;
    870    access_record *first_erase = reg_access_end;
    871    access_record *search_start = trgt + 1;
    872 
    873    while (trgt != reg_access_end) {
    874       access_record *src = find_next_rename(search_start, reg_access_end,
    875                                             trgt->end);
    876       if (src != reg_access_end) {
    877          result[src->reg].new_reg = trgt->reg;
    878          result[src->reg].valid = true;
    879          trgt->end = src->end;
    880 
    881          /* Since we only search forward, don't remove the renamed
    882           * register just now, only mark it. */
    883          src->erase = true;
    884 
    885          if (first_erase == reg_access_end)
    886             first_erase = src;
    887 
    888          search_start = src + 1;
    889       } else {
    890          /* Moving to the next target register it is time to remove
    891           * the already merged registers from the search range */
    892          if (first_erase != reg_access_end) {
    893             access_record *outp = first_erase;
    894             access_record *inp = first_erase + 1;
    895 
    896             while (inp != reg_access_end) {
    897                if (!inp->erase)
    898                   *outp++ = *inp;
    899                ++inp;
    900             }
    901 
    902             reg_access_end = outp;
    903             first_erase = reg_access_end;
    904          }
    905          ++trgt;
    906          search_start = trgt + 1;
    907       }
    908    }
    909    ralloc_free(reg_access);
    910 }
    911 
    912 /* Code below used for debugging */
    913 #ifndef NDEBUG
    914 static const char swizzle_txt[] = "xyzw";
    915 
    916 static const char *tgsi_file_names[PROGRAM_FILE_MAX] =  {
    917    "TEMP",  "ARRAY",   "IN", "OUT", "STATE", "CONST",
    918    "UNIFORM",  "WO", "ADDR", "SAMPLER",  "SV", "UNDEF",
    919    "IMM", "BUF",  "MEM",  "IMAGE"
    920 };
    921 
    922 static
    923 void dump_instruction(int line, prog_scope *scope,
    924                       const glsl_to_tgsi_instruction& inst)
    925 {
    926    const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst.op);
    927 
    928    int indent = scope->nesting_depth();
    929    if ((scope->type() == switch_case_branch ||
    930         scope->type() == switch_default_branch) &&
    931        (info->opcode == TGSI_OPCODE_CASE ||
    932         info->opcode == TGSI_OPCODE_DEFAULT))
    933       --indent;
    934 
    935    if (info->opcode == TGSI_OPCODE_ENDIF ||
    936        info->opcode == TGSI_OPCODE_ELSE ||
    937        info->opcode == TGSI_OPCODE_ENDLOOP ||
    938        info->opcode == TGSI_OPCODE_ENDSWITCH)
    939       --indent;
    940 
    941    cerr << setw(4) << line << ": ";
    942    for (int i = 0; i < indent; ++i)
    943       cerr << "    ";
    944    cerr << tgsi_get_opcode_name(info->opcode) << " ";
    945 
    946    bool has_operators = false;
    947    for (unsigned j = 0; j < num_inst_dst_regs(&inst); j++) {
    948       has_operators = true;
    949       if (j > 0)
    950          cerr << ", ";
    951 
    952       const st_dst_reg& dst = inst.dst[j];
    953       cerr << tgsi_file_names[dst.file];
    954 
    955       if (dst.file == PROGRAM_ARRAY)
    956          cerr << "(" << dst.array_id << ")";
    957 
    958       cerr << "[" << dst.index << "]";
    959 
    960       if (dst.writemask != TGSI_WRITEMASK_XYZW) {
    961          cerr << ".";
    962          if (dst.writemask & TGSI_WRITEMASK_X) cerr << "x";
    963          if (dst.writemask & TGSI_WRITEMASK_Y) cerr << "y";
    964          if (dst.writemask & TGSI_WRITEMASK_Z) cerr << "z";
    965          if (dst.writemask & TGSI_WRITEMASK_W) cerr << "w";
    966       }
    967    }
    968    if (has_operators)
    969       cerr << " := ";
    970 
    971    for (unsigned j = 0; j < num_inst_src_regs(&inst); j++) {
    972       if (j > 0)
    973          cerr << ", ";
    974 
    975       const st_src_reg& src = inst.src[j];
    976       cerr << tgsi_file_names[src.file]
    977            << "[" << src.index << "]";
    978       if (src.swizzle != SWIZZLE_XYZW) {
    979          cerr << ".";
    980          for (int idx = 0; idx < 4; ++idx) {
    981             int swz = GET_SWZ(src.swizzle, idx);
    982             if (swz < 4) {
    983                cerr << swizzle_txt[swz];
    984             }
    985          }
    986       }
    987    }
    988 
    989    if (inst.tex_offset_num_offset > 0) {
    990       cerr << ", TEXOFS: ";
    991       for (unsigned j = 0; j < inst.tex_offset_num_offset; j++) {
    992          if (j > 0)
    993             cerr << ", ";
    994 
    995          const st_src_reg& src = inst.tex_offsets[j];
    996          cerr << tgsi_file_names[src.file]
    997                << "[" << src.index << "]";
    998          if (src.swizzle != SWIZZLE_XYZW) {
    999             cerr << ".";
   1000             for (int idx = 0; idx < 4; ++idx) {
   1001                int swz = GET_SWZ(src.swizzle, idx);
   1002                if (swz < 4) {
   1003                   cerr << swizzle_txt[swz];
   1004                }
   1005             }
   1006          }
   1007       }
   1008    }
   1009    cerr << "\n";
   1010 }
   1011 #endif
   1012