Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  */
     22 
     23 #include "codegen/nv50_ir.h"
     24 #include "codegen/nv50_ir_target.h"
     25 
     26 namespace nv50_ir {
     27 
     28 const uint8_t Target::operationSrcNr[] =
     29 {
     30    0, 0,                   // NOP, PHI
     31    0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
     32    1, 1, 2,                // MOV, LOAD, STORE
     33    2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD
     34    1, 1, 1,                // ABS, NEG, NOT
     35    2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
     36    2, 2, 1,                // MAX, MIN, SAT
     37    1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
     38    3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
     39    1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
     40    1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
     41    0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
     42    0, 0, 0,                // PRERET,CONT,BREAK
     43    0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
     44    1, 1, 1, 2, 1, 2,       // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
     45    1, 1,                   // EMIT, RESTART
     46    1, 1, 1,                // TEX, TXB, TXL,
     47    1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
     48    1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
     49    3, 3, 3, 1, 3,          // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
     50    0,                      // TEXBAR
     51    1, 1,                   // DFDX, DFDY
     52    1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
     53    2, 3, 2, 1, 3,          // POPCNT, INSBF, EXTBF, BFIND, PERMT
     54    2, 2,                   // ATOM, BAR
     55    2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
     56    2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
     57    3,                      // SHFL
     58    1,                      // VOTE
     59    1,                      // BUFQ
     60    0
     61 };
     62 
     63 const OpClass Target::operationClass[] =
     64 {
     65    // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
     66    OPCLASS_OTHER,
     67    OPCLASS_PSEUDO,
     68    OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
     69    // MOV; LOAD; STORE
     70    OPCLASS_MOVE,
     71    OPCLASS_LOAD,
     72    OPCLASS_STORE,
     73    // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
     74    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
     75    OPCLASS_ARITH, OPCLASS_ARITH,
     76    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
     77    // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
     78    OPCLASS_CONVERT, OPCLASS_CONVERT,
     79    OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
     80    OPCLASS_SHIFT, OPCLASS_SHIFT,
     81    // MAX, MIN
     82    OPCLASS_COMPARE, OPCLASS_COMPARE,
     83    // SAT, CEIL, FLOOR, TRUNC; CVT
     84    OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
     85    OPCLASS_CONVERT,
     86    // SET(AND,OR,XOR); SELP, SLCT
     87    OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
     88    OPCLASS_COMPARE, OPCLASS_COMPARE,
     89    // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
     90    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
     91    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
     92    OPCLASS_SFU, OPCLASS_SFU,
     93    // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
     94    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
     95    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
     96    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
     97    // DISCARD, EXIT
     98    OPCLASS_FLOW, OPCLASS_FLOW,
     99    // MEMBAR
    100    OPCLASS_CONTROL,
    101    // VFETCH, PFETCH, AFETCH, EXPORT
    102    OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
    103    // LINTERP, PINTERP
    104    OPCLASS_SFU, OPCLASS_SFU,
    105    // EMIT, RESTART
    106    OPCLASS_CONTROL, OPCLASS_CONTROL,
    107    // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
    108    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
    109    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
    110    OPCLASS_TEXTURE, OPCLASS_TEXTURE,
    111    // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
    112    OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
    113    OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
    114    // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
    115    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
    116    // TEXBAR
    117    OPCLASS_OTHER,
    118    // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
    119    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
    120    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
    121    // POPCNT, INSBF, EXTBF, BFIND; PERMT
    122    OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
    123    OPCLASS_BITFIELD,
    124    // ATOM, BAR
    125    OPCLASS_ATOMIC, OPCLASS_CONTROL,
    126    // VADD, VAVG, VMIN, VMAX
    127    OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
    128    // VSAD, VSET, VSHR, VSHL
    129    OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
    130    // VSEL, CCTL
    131    OPCLASS_VECTOR, OPCLASS_CONTROL,
    132    // SHFL
    133    OPCLASS_OTHER,
    134    // VOTE
    135    OPCLASS_OTHER,
    136    // BUFQ
    137    OPCLASS_OTHER,
    138    OPCLASS_PSEUDO // LAST
    139 };
    140 
    141 
    142 extern Target *getTargetGM107(unsigned int chipset);
    143 extern Target *getTargetNVC0(unsigned int chipset);
    144 extern Target *getTargetNV50(unsigned int chipset);
    145 
    146 Target *Target::create(unsigned int chipset)
    147 {
    148    STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
    149    STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
    150    switch (chipset & ~0xf) {
    151    case 0x110:
    152    case 0x120:
    153    case 0x130:
    154       return getTargetGM107(chipset);
    155    case 0xc0:
    156    case 0xd0:
    157    case 0xe0:
    158    case 0xf0:
    159    case 0x100:
    160       return getTargetNVC0(chipset);
    161    case 0x50:
    162    case 0x80:
    163    case 0x90:
    164    case 0xa0:
    165       return getTargetNV50(chipset);
    166    default:
    167       ERROR("unsupported target: NV%x\n", chipset);
    168       return 0;
    169    }
    170 }
    171 
    172 void Target::destroy(Target *targ)
    173 {
    174    delete targ;
    175 }
    176 
    177 CodeEmitter::CodeEmitter(const Target *target) : targ(target), fixupInfo(NULL)
    178 {
    179 }
    180 
    181 void
    182 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
    183 {
    184    code = reinterpret_cast<uint32_t *>(ptr);
    185    codeSize = 0;
    186    codeSizeLimit = size;
    187 }
    188 
    189 void
    190 CodeEmitter::printBinary() const
    191 {
    192    uint32_t *bin = code - codeSize / 4;
    193    INFO("program binary (%u bytes)", codeSize);
    194    for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
    195       if ((pos % 8) == 0)
    196          INFO("\n");
    197       INFO("%08x ", bin[pos]);
    198    }
    199    INFO("\n");
    200 }
    201 
    202 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
    203 {
    204    return (size + 55) / 56;
    205 }
    206 
    207 void
    208 CodeEmitter::prepareEmission(Program *prog)
    209 {
    210    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
    211         !fi.end(); fi.next()) {
    212       Function *func = reinterpret_cast<Function *>(fi.get());
    213       func->binPos = prog->binSize;
    214       prepareEmission(func);
    215 
    216       // adjust sizes & positions for schedulding info:
    217       if (prog->getTarget()->hasSWSched) {
    218          uint32_t adjPos = func->binPos;
    219          BasicBlock *bb = NULL;
    220          for (int i = 0; i < func->bbCount; ++i) {
    221             bb = func->bbArray[i];
    222             int32_t adjSize = bb->binSize;
    223             if (adjPos % 64) {
    224                adjSize -= 64 - adjPos % 64;
    225                if (adjSize < 0)
    226                   adjSize = 0;
    227             }
    228             adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
    229             bb->binPos = adjPos;
    230             bb->binSize = adjSize;
    231             adjPos += adjSize;
    232          }
    233          if (bb)
    234             func->binSize = adjPos - func->binPos;
    235       }
    236 
    237       prog->binSize += func->binSize;
    238    }
    239 }
    240 
    241 void
    242 CodeEmitter::prepareEmission(Function *func)
    243 {
    244    func->bbCount = 0;
    245    func->bbArray = new BasicBlock * [func->cfg.getSize()];
    246 
    247    BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
    248 
    249    for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
    250       prepareEmission(BasicBlock::get(*it));
    251 }
    252 
    253 void
    254 CodeEmitter::prepareEmission(BasicBlock *bb)
    255 {
    256    Instruction *i, *next;
    257    Function *func = bb->getFunction();
    258    int j;
    259    unsigned int nShort;
    260 
    261    for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
    262 
    263    for (; j >= 0; --j) {
    264       BasicBlock *in = func->bbArray[j];
    265       Instruction *exit = in->getExit();
    266 
    267       if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
    268          in->binSize -= 8;
    269          func->binSize -= 8;
    270 
    271          for (++j; j < func->bbCount; ++j)
    272             func->bbArray[j]->binPos -= 8;
    273 
    274          in->remove(exit);
    275       }
    276       bb->binPos = in->binPos + in->binSize;
    277       if (in->binSize) // no more no-op branches to bb
    278          break;
    279    }
    280    func->bbArray[func->bbCount++] = bb;
    281 
    282    if (!bb->getExit())
    283       return;
    284 
    285    // determine encoding size, try to group short instructions
    286    nShort = 0;
    287    for (i = bb->getEntry(); i; i = next) {
    288       next = i->next;
    289 
    290       if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
    291          bb->remove(i);
    292          continue;
    293       }
    294 
    295       i->encSize = getMinEncodingSize(i);
    296       if (next && i->encSize < 8)
    297          ++nShort;
    298       else
    299       if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
    300          if (i->isCommutationLegal(i->next)) {
    301             bb->permuteAdjacent(i, next);
    302             next->encSize = 4;
    303             next = i;
    304             i = i->prev;
    305             ++nShort;
    306          } else
    307          if (i->isCommutationLegal(i->prev) && next->next) {
    308             bb->permuteAdjacent(i->prev, i);
    309             next->encSize = 4;
    310             next = next->next;
    311             bb->binSize += 4;
    312             ++nShort;
    313          } else {
    314             i->encSize = 8;
    315             i->prev->encSize = 8;
    316             bb->binSize += 4;
    317             nShort = 0;
    318          }
    319       } else {
    320          i->encSize = 8;
    321          if (nShort & 1) {
    322             i->prev->encSize = 8;
    323             bb->binSize += 4;
    324          }
    325          nShort = 0;
    326       }
    327       bb->binSize += i->encSize;
    328    }
    329 
    330    if (bb->getExit()->encSize == 4) {
    331       assert(nShort);
    332       bb->getExit()->encSize = 8;
    333       bb->binSize += 4;
    334 
    335       if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
    336          bb->binSize += 8;
    337          bb->getExit()->prev->encSize = 8;
    338       }
    339    }
    340    assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
    341 
    342    func->binSize += bb->binSize;
    343 }
    344 
    345 void
    346 Program::emitSymbolTable(struct nv50_ir_prog_info *info)
    347 {
    348    unsigned int n = 0, nMax = allFuncs.getSize();
    349 
    350    info->bin.syms =
    351       (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
    352 
    353    for (ArrayList::Iterator fi = allFuncs.iterator();
    354         !fi.end();
    355         fi.next(), ++n) {
    356       Function *f = (Function *)fi.get();
    357       assert(n < nMax);
    358 
    359       info->bin.syms[n].label = f->getLabel();
    360       info->bin.syms[n].offset = f->binPos;
    361    }
    362 
    363    info->bin.numSyms = n;
    364 }
    365 
    366 bool
    367 Program::emitBinary(struct nv50_ir_prog_info *info)
    368 {
    369    CodeEmitter *emit = target->getCodeEmitter(progType);
    370 
    371    emit->prepareEmission(this);
    372 
    373    if (dbgFlags & NV50_IR_DEBUG_BASIC)
    374       this->print();
    375 
    376    if (!binSize) {
    377       code = NULL;
    378       return false;
    379    }
    380    code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
    381    if (!code)
    382       return false;
    383    emit->setCodeLocation(code, binSize);
    384    info->bin.instructions = 0;
    385 
    386    for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
    387       Function *fn = reinterpret_cast<Function *>(fi.get());
    388 
    389       assert(emit->getCodeSize() == fn->binPos);
    390 
    391       for (int b = 0; b < fn->bbCount; ++b) {
    392          for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
    393             emit->emitInstruction(i);
    394             info->bin.instructions++;
    395             if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
    396                info->io.fp64 = true;
    397          }
    398       }
    399    }
    400    info->bin.relocData = emit->getRelocInfo();
    401    info->bin.fixupData = emit->getFixupInfo();
    402 
    403    emitSymbolTable(info);
    404 
    405    // the nvc0 driver will print the binary iself together with the header
    406    if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
    407       emit->printBinary();
    408 
    409    delete emit;
    410    return true;
    411 }
    412 
    413 #define RELOC_ALLOC_INCREMENT 8
    414 
    415 bool
    416 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
    417                       int s)
    418 {
    419    unsigned int n = relocInfo ? relocInfo->count : 0;
    420 
    421    if (!(n % RELOC_ALLOC_INCREMENT)) {
    422       size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
    423       relocInfo = reinterpret_cast<RelocInfo *>(
    424          REALLOC(relocInfo, n ? size : 0,
    425                  size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
    426       if (!relocInfo)
    427          return false;
    428       if (n == 0)
    429          memset(relocInfo, 0, sizeof(RelocInfo));
    430    }
    431    ++relocInfo->count;
    432 
    433    relocInfo->entry[n].data = data;
    434    relocInfo->entry[n].mask = m;
    435    relocInfo->entry[n].offset = codeSize + w * 4;
    436    relocInfo->entry[n].bitPos = s;
    437    relocInfo->entry[n].type = ty;
    438 
    439    return true;
    440 }
    441 
    442 bool
    443 CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
    444 {
    445    unsigned int n = fixupInfo ? fixupInfo->count : 0;
    446 
    447    if (!(n % RELOC_ALLOC_INCREMENT)) {
    448       size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);
    449       fixupInfo = reinterpret_cast<FixupInfo *>(
    450          REALLOC(fixupInfo, n ? size : 0,
    451                  size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));
    452       if (!fixupInfo)
    453          return false;
    454       if (n == 0)
    455          memset(fixupInfo, 0, sizeof(FixupInfo));
    456    }
    457    ++fixupInfo->count;
    458 
    459    fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);
    460 
    461    return true;
    462 }
    463 
    464 void
    465 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
    466 {
    467    uint32_t value = 0;
    468 
    469    switch (type) {
    470    case TYPE_CODE: value = info->codePos; break;
    471    case TYPE_BUILTIN: value = info->libPos; break;
    472    case TYPE_DATA: value = info->dataPos; break;
    473    default:
    474       assert(0);
    475       break;
    476    }
    477    value += data;
    478    value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
    479 
    480    binary[offset / 4] &= ~mask;
    481    binary[offset / 4] |= value & mask;
    482 }
    483 
    484 } // namespace nv50_ir
    485 
    486 
    487 #include "codegen/nv50_ir_driver.h"
    488 
    489 extern "C" {
    490 
    491 void
    492 nv50_ir_relocate_code(void *relocData, uint32_t *code,
    493                       uint32_t codePos,
    494                       uint32_t libPos,
    495                       uint32_t dataPos)
    496 {
    497    nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
    498 
    499    info->codePos = codePos;
    500    info->libPos = libPos;
    501    info->dataPos = dataPos;
    502 
    503    for (unsigned int i = 0; i < info->count; ++i)
    504       info->entry[i].apply(code, info);
    505 }
    506 
    507 void
    508 nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
    509                      bool force_persample_interp, bool flatshade,
    510                      uint8_t alphatest)
    511 {
    512    nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
    513       fixupData);
    514 
    515    // force_persample_interp: all non-flat -> per-sample
    516    // flatshade: all color -> flat
    517    // alphatest: PIPE_FUNC_* to use with alphatest
    518    nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest);
    519    for (unsigned i = 0; i < info->count; ++i)
    520       info->entry[i].apply(&info->entry[i], code, data);
    521 }
    522 
    523 void
    524 nv50_ir_get_target_library(uint32_t chipset,
    525                            const uint32_t **code, uint32_t *size)
    526 {
    527    nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
    528    targ->getBuiltinCode(code, size);
    529    nv50_ir::Target::destroy(targ);
    530 }
    531 
    532 }
    533