Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *           2014 Red Hat Inc.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be included in
     13  * all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21  * OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "codegen/nv50_ir_target_gm107.h"
     25 #include "codegen/nv50_ir_lowering_gm107.h"
     26 
     27 namespace nv50_ir {
     28 
     29 Target *getTargetGM107(unsigned int chipset)
     30 {
     31    return new TargetGM107(chipset);
     32 }
     33 
     34 // BULTINS / LIBRARY FUNCTIONS:
     35 
     36 // lazyness -> will just hardcode everything for the time being
     37 
     38 #include "lib/gm107.asm.h"
     39 
     40 void
     41 TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
     42 {
     43    *code = (const uint32_t *)&gm107_builtin_code[0];
     44    *size = sizeof(gm107_builtin_code);
     45 }
     46 
     47 uint32_t
     48 TargetGM107::getBuiltinOffset(int builtin) const
     49 {
     50    assert(builtin < NVC0_BUILTIN_COUNT);
     51    return gm107_builtin_offsets[builtin];
     52 }
     53 
     54 bool
     55 TargetGM107::isOpSupported(operation op, DataType ty) const
     56 {
     57    switch (op) {
     58    case OP_SAD:
     59    case OP_POW:
     60    case OP_SQRT:
     61    case OP_DIV:
     62    case OP_MOD:
     63       return false;
     64    default:
     65       break;
     66    }
     67 
     68    return true;
     69 }
     70 
     71 // Return true when an instruction supports the reuse flag. When supported, the
     72 // hardware will use the operand reuse cache introduced since Maxwell, which
     73 // should try to reduce bank conflicts by caching values for the subsequent
     74 // instructions. Note that the next instructions have to use the same GPR id in
     75 // the same operand slot.
     76 bool
     77 TargetGM107::isReuseSupported(const Instruction *insn) const
     78 {
     79    const OpClass cl = getOpClass(insn->op);
     80 
     81    // TODO: double-check!
     82    switch (cl) {
     83    case OPCLASS_ARITH:
     84    case OPCLASS_COMPARE:
     85    case OPCLASS_LOGIC:
     86    case OPCLASS_MOVE:
     87    case OPCLASS_SHIFT:
     88       return true;
     89    case OPCLASS_BITFIELD:
     90       if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
     91          return true;
     92       break;
     93    default:
     94       break;
     95    }
     96    return false;
     97 }
     98 
     99 // Return true when an instruction requires to set up a barrier because it
    100 // doesn't operate at a fixed latency. Variable latency instructions are memory
    101 // operations, double precision operations, special function unit operations
    102 // and other low throughput instructions.
    103 bool
    104 TargetGM107::isBarrierRequired(const Instruction *insn) const
    105 {
    106    const OpClass cl = getOpClass(insn->op);
    107 
    108    if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
    109       return true;
    110 
    111    switch (cl) {
    112    case OPCLASS_ATOMIC:
    113    case OPCLASS_LOAD:
    114    case OPCLASS_STORE:
    115    case OPCLASS_SURFACE:
    116    case OPCLASS_TEXTURE:
    117       return true;
    118    case OPCLASS_SFU:
    119       switch (insn->op) {
    120       case OP_COS:
    121       case OP_EX2:
    122       case OP_LG2:
    123       case OP_LINTERP:
    124       case OP_PINTERP:
    125       case OP_RCP:
    126       case OP_RSQ:
    127       case OP_SIN:
    128          return true;
    129       default:
    130          break;
    131       }
    132       break;
    133    case OPCLASS_BITFIELD:
    134       switch (insn->op) {
    135       case OP_BFIND:
    136       case OP_POPCNT:
    137          return true;
    138       default:
    139          break;
    140       }
    141       break;
    142    case OPCLASS_CONTROL:
    143       switch (insn->op) {
    144       case OP_EMIT:
    145       case OP_RESTART:
    146          return true;
    147       default:
    148          break;
    149       }
    150       break;
    151    case OPCLASS_OTHER:
    152       switch (insn->op) {
    153       case OP_AFETCH:
    154       case OP_PFETCH:
    155       case OP_PIXLD:
    156       case OP_RDSV:
    157       case OP_SHFL:
    158          return true;
    159       default:
    160          break;
    161       }
    162       break;
    163    case OPCLASS_ARITH:
    164       // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
    165       if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
    166           !isFloatType(insn->dType))
    167          return true;
    168       break;
    169    case OPCLASS_CONVERT:
    170       if (insn->def(0).getFile() != FILE_PREDICATE &&
    171           insn->src(0).getFile() != FILE_PREDICATE)
    172          return true;
    173       break;
    174    default:
    175       break;
    176    }
    177    return false;
    178 }
    179 
    180 bool
    181 TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
    182 {
    183    // TODO
    184    return false;
    185 }
    186 
    187 // Return the number of stall counts needed to complete a single instruction.
    188 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require
    189 // different number of stall counts like memory operations.
    190 int
    191 TargetGM107::getLatency(const Instruction *insn) const
    192 {
    193    // TODO: better values! This should be good enough for now though.
    194    switch (insn->op) {
    195    case OP_EMIT:
    196    case OP_EXPORT:
    197    case OP_PIXLD:
    198    case OP_RESTART:
    199    case OP_STORE:
    200    case OP_SUSTB:
    201    case OP_SUSTP:
    202       return 1;
    203    case OP_SHFL:
    204       return 2;
    205    case OP_ADD:
    206    case OP_AND:
    207    case OP_EXTBF:
    208    case OP_FMA:
    209    case OP_INSBF:
    210    case OP_MAD:
    211    case OP_MAX:
    212    case OP_MIN:
    213    case OP_MOV:
    214    case OP_MUL:
    215    case OP_NOT:
    216    case OP_OR:
    217    case OP_PREEX2:
    218    case OP_PRESIN:
    219    case OP_QUADOP:
    220    case OP_SELP:
    221    case OP_SET:
    222    case OP_SET_AND:
    223    case OP_SET_OR:
    224    case OP_SET_XOR:
    225    case OP_SHL:
    226    case OP_SHLADD:
    227    case OP_SHR:
    228    case OP_SLCT:
    229    case OP_SUB:
    230    case OP_VOTE:
    231    case OP_XOR:
    232       if (insn->dType != TYPE_F64)
    233          return 6;
    234       break;
    235    case OP_ABS:
    236    case OP_CEIL:
    237    case OP_CVT:
    238    case OP_FLOOR:
    239    case OP_NEG:
    240    case OP_SAT:
    241    case OP_TRUNC:
    242       if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
    243                                  insn->src(0).getFile() == FILE_PREDICATE))
    244          return 6;
    245       break;
    246    case OP_BFIND:
    247    case OP_COS:
    248    case OP_EX2:
    249    case OP_LG2:
    250    case OP_POPCNT:
    251    case OP_QUADON:
    252    case OP_QUADPOP:
    253    case OP_RCP:
    254    case OP_RSQ:
    255    case OP_SIN:
    256       return 13;
    257    default:
    258       break;
    259    }
    260    // Use the maximum number of stall counts for other instructions.
    261    return 15;
    262 }
    263 
    264 // Return the operand read latency which is the number of stall counts before
    265 // an instruction can read its sources. For memory operations like ATOM, LOAD
    266 // and STORE, the memory access has to be indirect.
    267 int
    268 TargetGM107::getReadLatency(const Instruction *insn) const
    269 {
    270    switch (insn->op) {
    271    case OP_ABS:
    272    case OP_BFIND:
    273    case OP_CEIL:
    274    case OP_COS:
    275    case OP_EX2:
    276    case OP_FLOOR:
    277    case OP_LG2:
    278    case OP_NEG:
    279    case OP_POPCNT:
    280    case OP_RCP:
    281    case OP_RSQ:
    282    case OP_SAT:
    283    case OP_SIN:
    284    case OP_SULDB:
    285    case OP_SULDP:
    286    case OP_SUREDB:
    287    case OP_SUREDP:
    288    case OP_SUSTB:
    289    case OP_SUSTP:
    290    case OP_TRUNC:
    291       return 4;
    292    case OP_CVT:
    293       if (insn->def(0).getFile() != FILE_PREDICATE &&
    294           insn->src(0).getFile() != FILE_PREDICATE)
    295          return 4;
    296       break;
    297    case OP_ATOM:
    298    case OP_LOAD:
    299    case OP_STORE:
    300       if (insn->src(0).isIndirect(0)) {
    301          switch (insn->src(0).getFile()) {
    302          case FILE_MEMORY_SHARED:
    303          case FILE_MEMORY_CONST:
    304             return 2;
    305          case FILE_MEMORY_GLOBAL:
    306          case FILE_MEMORY_LOCAL:
    307             return 4;
    308          default:
    309             break;
    310          }
    311       }
    312       break;
    313    case OP_EXPORT:
    314    case OP_PFETCH:
    315    case OP_SHFL:
    316    case OP_VFETCH:
    317       return 2;
    318    default:
    319       break;
    320    }
    321    return 0;
    322 }
    323 
    324 bool
    325 TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
    326 {
    327    if (stage == CG_STAGE_PRE_SSA) {
    328       GM107LoweringPass pass(prog);
    329       return pass.run(prog, false, true);
    330    } else
    331    if (stage == CG_STAGE_POST_RA) {
    332       NVC0LegalizePostRA pass(prog);
    333       return pass.run(prog, false, true);
    334    } else
    335    if (stage == CG_STAGE_SSA) {
    336       GM107LegalizeSSA pass;
    337       return pass.run(prog, false, true);
    338    }
    339    return false;
    340 }
    341 
    342 CodeEmitter *
    343 TargetGM107::getCodeEmitter(Program::Type type)
    344 {
    345    return createCodeEmitterGM107(type);
    346 }
    347 
    348 } // namespace nv50_ir
    349