Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     20  * SOFTWARE.
     21  */
     22 
     23 #include "nv50/codegen/nv50_ir.h"
     24 #include "nv50/codegen/nv50_ir_target.h"
     25 
     26 namespace nv50_ir {
     27 
     28 const uint8_t Target::operationSrcNr[OP_LAST + 1] =
     29 {
     30    0, 0,                   // NOP, PHI
     31    0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
     32    1, 1, 2,                // MOV, LOAD, STORE
     33    2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
     34    1, 1, 1,                // ABS, NEG, NOT
     35    2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
     36    2, 2, 1,                // MAX, MIN, SAT
     37    1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
     38    3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
     39    1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
     40    1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
     41    0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
     42    0, 0, 0,                // PRERET,CONT,BREAK
     43    0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
     44    1, 1, 2, 1, 2,          // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
     45    1, 1,                   // EMIT, RESTART
     46    1, 1, 1,                // TEX, TXB, TXL,
     47    1, 1, 1, 1, 1,          // TXF, TXQ, TXD, TXG, TEXCSAA
     48    1, 2,                   // SULD, SUST
     49    1, 1,                   // DFDX, DFDY
     50    1, 2, 2, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
     51    2, 3, 2, 0,             // POPCNT, INSBF, EXTBF, TEXBAR
     52    0
     53 };
     54 
     55 const OpClass Target::operationClass[OP_LAST + 1] =
     56 {
     57    // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
     58    OPCLASS_OTHER,
     59    OPCLASS_PSEUDO,
     60    OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
     61    // MOV; LOAD; STORE
     62    OPCLASS_MOVE,
     63    OPCLASS_LOAD,
     64    OPCLASS_STORE,
     65    // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
     66    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
     67    OPCLASS_ARITH, OPCLASS_ARITH,
     68    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
     69    // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
     70    OPCLASS_CONVERT, OPCLASS_CONVERT,
     71    OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
     72    OPCLASS_SHIFT, OPCLASS_SHIFT,
     73    // MAX, MIN
     74    OPCLASS_COMPARE, OPCLASS_COMPARE,
     75    // SAT, CEIL, FLOOR, TRUNC; CVT
     76    OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
     77    OPCLASS_CONVERT,
     78    // SET(AND,OR,XOR); SELP, SLCT
     79    OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
     80    OPCLASS_COMPARE, OPCLASS_COMPARE,
     81    // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
     82    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
     83    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
     84    OPCLASS_SFU, OPCLASS_SFU,
     85    // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
     86    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
     87    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
     88    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
     89    // DISCARD, EXIT
     90    OPCLASS_FLOW, OPCLASS_FLOW,
     91    // MEMBAR
     92    OPCLASS_OTHER,
     93    // VFETCH, PFETCH, EXPORT
     94    OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
     95    // LINTERP, PINTERP
     96    OPCLASS_SFU, OPCLASS_SFU,
     97    // EMIT, RESTART
     98    OPCLASS_OTHER, OPCLASS_OTHER,
     99    // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA
    100    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
    101    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
    102    // SULD, SUST
    103    OPCLASS_SURFACE, OPCLASS_SURFACE,
    104    // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
    105    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
    106    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
    107    // POPCNT, INSBF, EXTBF
    108    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
    109    // TEXBAR
    110    OPCLASS_OTHER,
    111    OPCLASS_PSEUDO // LAST
    112 };
    113 
    114 
    115 extern Target *getTargetNVC0(unsigned int chipset);
    116 extern Target *getTargetNV50(unsigned int chipset);
    117 
    118 Target *Target::create(unsigned int chipset)
    119 {
    120    switch (chipset & 0xf0) {
    121    case 0xc0:
    122    case 0xd0:
    123    case 0xe0:
    124       return getTargetNVC0(chipset);
    125    case 0x50:
    126    case 0x80:
    127    case 0x90:
    128    case 0xa0:
    129       return getTargetNV50(chipset);
    130    default:
    131       ERROR("unsupported target: NV%x\n", chipset);
    132       return 0;
    133    }
    134 }
    135 
    136 void Target::destroy(Target *targ)
    137 {
    138    delete targ;
    139 }
    140 
    141 CodeEmitter::CodeEmitter(const Target *target) : targ(target)
    142 {
    143 }
    144 
    145 void
    146 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
    147 {
    148    code = reinterpret_cast<uint32_t *>(ptr);
    149    codeSize = 0;
    150    codeSizeLimit = size;
    151 }
    152 
    153 void
    154 CodeEmitter::printBinary() const
    155 {
    156    uint32_t *bin = code - codeSize / 4;
    157    INFO("program binary (%u bytes)", codeSize);
    158    for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
    159       if ((pos % 8) == 0)
    160          INFO("\n");
    161       INFO("%08x ", bin[pos]);
    162    }
    163    INFO("\n");
    164 }
    165 
    166 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
    167 {
    168    return (size + 55) / 56;
    169 }
    170 
    171 void
    172 CodeEmitter::prepareEmission(Program *prog)
    173 {
    174    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
    175         !fi.end(); fi.next()) {
    176       Function *func = reinterpret_cast<Function *>(fi.get());
    177       func->binPos = prog->binSize;
    178       prepareEmission(func);
    179 
    180       // adjust sizes & positions for schedulding info:
    181       if (prog->getTarget()->hasSWSched) {
    182          BasicBlock *bb = NULL;
    183          for (int i = 0; i < func->bbCount; ++i) {
    184             bb = func->bbArray[i];
    185             const uint32_t oldPos = bb->binPos;
    186             const uint32_t oldEnd = bb->binPos + bb->binSize;
    187             uint32_t adjPos = oldPos + sizeToBundlesNVE4(oldPos) * 8;
    188             uint32_t adjEnd = oldEnd + sizeToBundlesNVE4(oldEnd) * 8;
    189             bb->binPos = adjPos;
    190             bb->binSize = adjEnd - adjPos;
    191          }
    192          if (bb)
    193             func->binSize = bb->binPos + bb->binSize;
    194       }
    195 
    196       prog->binSize += func->binSize;
    197    }
    198 }
    199 
    200 void
    201 CodeEmitter::prepareEmission(Function *func)
    202 {
    203    func->bbCount = 0;
    204    func->bbArray = new BasicBlock * [func->cfg.getSize()];
    205 
    206    BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
    207 
    208    for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
    209       prepareEmission(BasicBlock::get(*it));
    210 }
    211 
    212 void
    213 CodeEmitter::prepareEmission(BasicBlock *bb)
    214 {
    215    Instruction *i, *next;
    216    Function *func = bb->getFunction();
    217    int j;
    218    unsigned int nShort;
    219 
    220    for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
    221 
    222    for (; j >= 0; --j) {
    223       BasicBlock *in = func->bbArray[j];
    224       Instruction *exit = in->getExit();
    225 
    226       if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
    227          in->binSize -= 8;
    228          func->binSize -= 8;
    229 
    230          for (++j; j < func->bbCount; ++j)
    231             func->bbArray[j]->binPos -= 8;
    232 
    233          in->remove(exit);
    234       }
    235       bb->binPos = in->binPos + in->binSize;
    236       if (in->binSize) // no more no-op branches to bb
    237          break;
    238    }
    239    func->bbArray[func->bbCount++] = bb;
    240 
    241    if (!bb->getExit())
    242       return;
    243 
    244    // determine encoding size, try to group short instructions
    245    nShort = 0;
    246    for (i = bb->getEntry(); i; i = next) {
    247       next = i->next;
    248 
    249       i->encSize = getMinEncodingSize(i);
    250       if (next && i->encSize < 8)
    251          ++nShort;
    252       else
    253       if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
    254          if (i->isCommutationLegal(i->next)) {
    255             bb->permuteAdjacent(i, next);
    256             next->encSize = 4;
    257             next = i;
    258             i = i->prev;
    259             ++nShort;
    260          } else
    261          if (i->isCommutationLegal(i->prev) && next->next) {
    262             bb->permuteAdjacent(i->prev, i);
    263             next->encSize = 4;
    264             next = next->next;
    265             bb->binSize += 4;
    266             ++nShort;
    267          } else {
    268             i->encSize = 8;
    269             i->prev->encSize = 8;
    270             bb->binSize += 4;
    271             nShort = 0;
    272          }
    273       } else {
    274          i->encSize = 8;
    275          if (nShort & 1) {
    276             i->prev->encSize = 8;
    277             bb->binSize += 4;
    278          }
    279          nShort = 0;
    280       }
    281       bb->binSize += i->encSize;
    282    }
    283 
    284    if (bb->getExit()->encSize == 4) {
    285       assert(nShort);
    286       bb->getExit()->encSize = 8;
    287       bb->binSize += 4;
    288 
    289       if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
    290          bb->binSize += 8;
    291          bb->getExit()->prev->encSize = 8;
    292       }
    293    }
    294    assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
    295 
    296    func->binSize += bb->binSize;
    297 }
    298 
    299 void
    300 Program::emitSymbolTable(struct nv50_ir_prog_info *info)
    301 {
    302    unsigned int n = 0, nMax = allFuncs.getSize();
    303 
    304    info->bin.syms =
    305       (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
    306 
    307    for (ArrayList::Iterator fi = allFuncs.iterator();
    308         !fi.end();
    309         fi.next(), ++n) {
    310       Function *f = (Function *)fi.get();
    311       assert(n < nMax);
    312 
    313       info->bin.syms[n].label = f->getLabel();
    314       info->bin.syms[n].offset = f->binPos;
    315    }
    316 
    317    info->bin.numSyms = n;
    318 }
    319 
    320 bool
    321 Program::emitBinary(struct nv50_ir_prog_info *info)
    322 {
    323    CodeEmitter *emit = target->getCodeEmitter(progType);
    324 
    325    emit->prepareEmission(this);
    326 
    327    if (dbgFlags & NV50_IR_DEBUG_BASIC)
    328       this->print();
    329 
    330    if (!binSize) {
    331       code = NULL;
    332       return false;
    333    }
    334    code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
    335    if (!code)
    336       return false;
    337    emit->setCodeLocation(code, binSize);
    338 
    339    for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
    340       Function *fn = reinterpret_cast<Function *>(fi.get());
    341 
    342       assert(emit->getCodeSize() == fn->binPos);
    343 
    344       for (int b = 0; b < fn->bbCount; ++b)
    345          for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
    346             emit->emitInstruction(i);
    347    }
    348    info->bin.relocData = emit->getRelocInfo();
    349 
    350    emitSymbolTable(info);
    351 
    352    // the nvc0 driver will print the binary iself together with the header
    353    if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
    354       emit->printBinary();
    355 
    356    delete emit;
    357    return true;
    358 }
    359 
    360 #define RELOC_ALLOC_INCREMENT 8
    361 
    362 bool
    363 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
    364                       int s)
    365 {
    366    unsigned int n = relocInfo ? relocInfo->count : 0;
    367 
    368    if (!(n % RELOC_ALLOC_INCREMENT)) {
    369       size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
    370       relocInfo = reinterpret_cast<RelocInfo *>(
    371          REALLOC(relocInfo, n ? size : 0,
    372                  size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
    373       if (!relocInfo)
    374          return false;
    375       if (n == 0)
    376          memset(relocInfo, 0, sizeof(RelocInfo));
    377    }
    378    ++relocInfo->count;
    379 
    380    relocInfo->entry[n].data = data;
    381    relocInfo->entry[n].mask = m;
    382    relocInfo->entry[n].offset = codeSize + w * 4;
    383    relocInfo->entry[n].bitPos = s;
    384    relocInfo->entry[n].type = ty;
    385 
    386    return true;
    387 }
    388 
    389 void
    390 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
    391 {
    392    uint32_t value = 0;
    393 
    394    switch (type) {
    395    case TYPE_CODE: value = info->codePos; break;
    396    case TYPE_BUILTIN: value = info->libPos; break;
    397    case TYPE_DATA: value = info->dataPos; break;
    398    default:
    399       assert(0);
    400       break;
    401    }
    402    value += data;
    403    value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
    404 
    405    binary[offset / 4] &= ~mask;
    406    binary[offset / 4] |= value & mask;
    407 }
    408 
    409 } // namespace nv50_ir
    410 
    411 
    412 #include "nv50/codegen/nv50_ir_driver.h"
    413 
    414 extern "C" {
    415 
    416 void
    417 nv50_ir_relocate_code(void *relocData, uint32_t *code,
    418                       uint32_t codePos,
    419                       uint32_t libPos,
    420                       uint32_t dataPos)
    421 {
    422    nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
    423 
    424    info->codePos = codePos;
    425    info->libPos = libPos;
    426    info->dataPos = dataPos;
    427 
    428    for (unsigned int i = 0; i < info->count; ++i)
    429       info->entry[i].apply(code, info);
    430 }
    431 
    432 void
    433 nv50_ir_get_target_library(uint32_t chipset,
    434                            const uint32_t **code, uint32_t *size)
    435 {
    436    nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
    437    targ->getBuiltinCode(code, size);
    438    nv50_ir::Target::destroy(targ);
    439 }
    440 
    441 }
    442