Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     20  * SOFTWARE.
     21  */
     22 
     23 #include "nv50_ir_target_nvc0.h"
     24 
     25 namespace nv50_ir {
     26 
     27 Target *getTargetNVC0(unsigned int chipset)
     28 {
     29    return new TargetNVC0(chipset);
     30 }
     31 
     32 TargetNVC0::TargetNVC0(unsigned int card) : Target(false, card >= 0xe4)
     33 {
     34    chipset = card;
     35    initOpInfo();
     36 }
     37 
     38 // BULTINS / LIBRARY FUNCTIONS:
     39 
     40 // lazyness -> will just hardcode everything for the time being
     41 
     42 // Will probably make this nicer once we support subroutines properly,
     43 // i.e. when we have an input IR that provides function declarations.
     44 
     45 // TODO: separate version for nve4+ which doesn't like the 4-byte insn formats
     46 static const uint32_t nvc0_builtin_code[] =
     47 {
     48 // DIV U32: slow unsigned integer division
     49 //
     50 // UNR recurrence (q = a / b):
     51 // look for z such that 2^32 - b <= b * z < 2^32
     52 // then q - 1 <= (a * z) / 2^32 <= q
     53 //
     54 // INPUT:   $r0: dividend, $r1: divisor
     55 // OUTPUT:  $r0: result, $r1: modulus
     56 // CLOBBER: $r2 - $r3, $p0 - $p1
     57 // SIZE:    22 / 14 * 8 bytes
     58 //
     59 #if 1
     60    0x04009c03, 0x78000000,
     61    0x7c209c82, 0x38000000, // 0x7c209cdd,
     62    0x0400dde2, 0x18000000, // 0x0010dd18,
     63    0x08309c03, 0x60000000,
     64    0x05205d04, 0x1c000000, // 0x05605c18,
     65    0x0810dc03, 0x50000000, // 0x0810dc2a,
     66    0x0c209c43, 0x20040000,
     67    0x0810dc03, 0x50000000,
     68    0x0c209c43, 0x20040000,
     69    0x0810dc03, 0x50000000,
     70    0x0c209c43, 0x20040000,
     71    0x0810dc03, 0x50000000,
     72    0x0c209c43, 0x20040000,
     73    0x0810dc03, 0x50000000,
     74    0x0c209c43, 0x20040000,
     75    0x0000dde4, 0x28000000,
     76    0x08001c43, 0x50000000,
     77    0x05209d04, 0x1c000000, // 0x05609c18,
     78    0x00105c03, 0x20060000, // 0x0010430d,
     79    0x0811dc03, 0x1b0e0000,
     80    0x08104103, 0x48000000,
     81    0x04000002, 0x08000000,
     82    0x0811c003, 0x1b0e0000,
     83    0x08104103, 0x48000000,
     84    0x04000002, 0x08000000, // 0x040000ac,
     85    0x00001de7, 0x90000000, // 0x90001dff,
     86 #else
     87    0x0401dc03, 0x1b0e0000,
     88    0x00008003, 0x78000000,
     89    0x0400c003, 0x78000000,
     90    0x0c20c103, 0x48000000,
     91    0x0c108003, 0x60000000,
     92    0x00005c28,
     93    0x00001d18,
     94    0x0031c023, 0x1b0ec000,
     95    0xb000a1e7, 0x40000000,
     96    0x04000003, 0x6000c000,
     97    0x0813dc03, 0x1b000000,
     98    0x0420446c,
     99    0x040004bd,
    100    0x04208003, 0x5800c000,
    101    0x0430c103, 0x4800c000,
    102    0x0ffc5dff,
    103    0x90001dff,
    104 #endif
    105 
    106 // DIV S32: slow signed integer division
    107 //
    108 // INPUT:   $r0: dividend, $r1: divisor
    109 // OUTPUT:  $r0: result, $r1: modulus
    110 // CLOBBER: $r2 - $r3, $p0 - $p3
    111 // SIZE:    18 * 8 bytes
    112 //
    113    0xfc05dc23, 0x188e0000,
    114    0xfc17dc23, 0x18c40000,
    115    0x01201ec4, 0x1c000000, // 0x03301e18,
    116    0x05205ec4, 0x1c000000, // 0x07305e18,
    117    0x0401dc03, 0x1b0e0000,
    118    0x00008003, 0x78000000,
    119    0x0400c003, 0x78000000,
    120    0x0c20c103, 0x48000000,
    121    0x0c108003, 0x60000000,
    122    0x00005de4, 0x28000000, // 0x00005c28,
    123    0x00001de2, 0x18000000, // 0x00001d18,
    124    0x0031c023, 0x1b0ec000,
    125    0xe000a1e7, 0x40000000, // 0xb000a1e7, 0x40000000,
    126    0x04000003, 0x6000c000,
    127    0x0813dc03, 0x1b000000,
    128    0x04204603, 0x48000000, // 0x0420446c,
    129    0x04000442, 0x38000000, // 0x040004bd,
    130    0x04208003, 0x5800c000,
    131    0x0430c103, 0x4800c000,
    132    0xe0001de7, 0x4003fffe, // 0x0ffc5dff,
    133    0x01200f84, 0x1c000000, // 0x01700e18,
    134    0x05204b84, 0x1c000000, // 0x05704a18,
    135    0x00001de7, 0x90000000, // 0x90001dff,
    136 
    137 // RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
    138 //
    139 // INPUT:   $r0d (x)
    140 // OUTPUT:  $r0d (rcp(x))
    141 // CLOBBER: $r2 - $r7
    142 // SIZE:    9 * 8 bytes
    143 //
    144    0x9810dc08,
    145    0x00009c28,
    146    0x4001df18,
    147    0x00019d18,
    148    0x08011e01, 0x200c0000,
    149    0x10209c01, 0x50000000,
    150    0x08011e01, 0x200c0000,
    151    0x10209c01, 0x50000000,
    152    0x08011e01, 0x200c0000,
    153    0x10201c01, 0x50000000,
    154    0x00001de7, 0x90000000,
    155 
    156 // RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
    157 //
    158 // INPUT:   $r0d (x)
    159 // OUTPUT:  $r0d (rsqrt(x))
    160 // CLOBBER: $r2 - $r7
    161 // SIZE:    14 * 8 bytes
    162 //
    163    0x9c10dc08,
    164    0x00009c28,
    165    0x00019d18,
    166    0x3fe1df18,
    167    0x18001c01, 0x50000000,
    168    0x0001dde2, 0x18ffe000,
    169    0x08211c01, 0x50000000,
    170    0x10011e01, 0x200c0000,
    171    0x10209c01, 0x50000000,
    172    0x08211c01, 0x50000000,
    173    0x10011e01, 0x200c0000,
    174    0x10209c01, 0x50000000,
    175    0x08211c01, 0x50000000,
    176    0x10011e01, 0x200c0000,
    177    0x10201c01, 0x50000000,
    178    0x00001de7, 0x90000000,
    179 };
    180 
    181 static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
    182 {
    183    0,
    184    8 * (26),
    185    8 * (26 + 23),
    186    8 * (26 + 23 + 9)
    187 };
    188 
    189 void
    190 TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
    191 {
    192    *code = &nvc0_builtin_code[0];
    193    *size = sizeof(nvc0_builtin_code);
    194 }
    195 
    196 uint32_t
    197 TargetNVC0::getBuiltinOffset(int builtin) const
    198 {
    199    assert(builtin < NVC0_BUILTIN_COUNT);
    200    return nvc0_builtin_offsets[builtin];
    201 }
    202 
    203 struct opProperties
    204 {
    205    operation op;
    206    unsigned int mNeg   : 4;
    207    unsigned int mAbs   : 4;
    208    unsigned int mNot   : 4;
    209    unsigned int mSat   : 4;
    210    unsigned int fConst : 3;
    211    unsigned int fImmd  : 4; // last bit indicates if full immediate is suppoted
    212 };
    213 
    214 static const struct opProperties _initProps[] =
    215 {
    216    //           neg  abs  not  sat  c[]  imm
    217    { OP_ADD,    0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 },
    218    { OP_SUB,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 },
    219    { OP_MUL,    0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 },
    220    { OP_MAX,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
    221    { OP_MIN,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
    222    { OP_MAD,    0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
    223    { OP_ABS,    0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
    224    { OP_NEG,    0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
    225    { OP_CVT,    0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
    226    { OP_CEIL,   0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
    227    { OP_FLOOR,  0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
    228    { OP_TRUNC,  0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
    229    { OP_AND,    0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
    230    { OP_OR,     0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
    231    { OP_XOR,    0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
    232    { OP_SHL,    0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
    233    { OP_SHR,    0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
    234    { OP_SET,    0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
    235    { OP_SLCT,   0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint
    236    { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
    237    { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
    238    { OP_COS,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
    239    { OP_SIN,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
    240    { OP_EX2,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
    241    { OP_LG2,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
    242    { OP_RCP,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
    243    { OP_RSQ,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
    244    { OP_DFDX,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
    245    { OP_DFDY,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
    246    { OP_CALL,   0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
    247    { OP_INSBF,  0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
    248    { OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
    249    { OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
    250    { OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
    251    // saturate only:
    252    { OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
    253    { OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
    254 };
    255 
    256 void TargetNVC0::initOpInfo()
    257 {
    258    unsigned int i, j;
    259 
    260    static const uint32_t commutative[(OP_LAST + 31) / 32] =
    261    {
    262       // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
    263       0x0670ca00, 0x0000003f, 0x00000000
    264    };
    265 
    266    static const uint32_t shortForm[(OP_LAST + 31) / 32] =
    267    {
    268       // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
    269       0x0670ca00, 0x00000000, 0x00000000
    270    };
    271 
    272    static const operation noDest[] =
    273    {
    274       OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
    275       OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
    276       OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
    277       OP_QUADON, OP_QUADPOP, OP_TEXBAR
    278    };
    279 
    280    for (i = 0; i < DATA_FILE_COUNT; ++i)
    281       nativeFileMap[i] = (DataFile)i;
    282    nativeFileMap[FILE_ADDRESS] = FILE_GPR;
    283 
    284    for (i = 0; i < OP_LAST; ++i) {
    285       opInfo[i].variants = NULL;
    286       opInfo[i].op = (operation)i;
    287       opInfo[i].srcTypes = 1 << (int)TYPE_F32;
    288       opInfo[i].dstTypes = 1 << (int)TYPE_F32;
    289       opInfo[i].immdBits = 0;
    290       opInfo[i].srcNr = operationSrcNr[i];
    291 
    292       for (j = 0; j < opInfo[i].srcNr; ++j) {
    293          opInfo[i].srcMods[j] = 0;
    294          opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
    295       }
    296       opInfo[i].dstMods = 0;
    297       opInfo[i].dstFiles = 1 << (int)FILE_GPR;
    298 
    299       opInfo[i].hasDest = 1;
    300       opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
    301       opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
    302       opInfo[i].pseudo = (i < OP_MOV);
    303       opInfo[i].predicate = !opInfo[i].pseudo;
    304       opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
    305       opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
    306    }
    307    for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)
    308       opInfo[noDest[i]].hasDest = 0;
    309 
    310    for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
    311       const struct opProperties *prop = &_initProps[i];
    312 
    313       for (int s = 0; s < 3; ++s) {
    314          if (prop->mNeg & (1 << s))
    315             opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
    316          if (prop->mAbs & (1 << s))
    317             opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
    318          if (prop->mNot & (1 << s))
    319             opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
    320          if (prop->fConst & (1 << s))
    321             opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
    322          if (prop->fImmd & (1 << s))
    323             opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
    324          if (prop->fImmd & 8)
    325             opInfo[prop->op].immdBits = 0xffffffff;
    326       }
    327       if (prop->mSat & 8)
    328          opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
    329    }
    330 }
    331 
    332 unsigned int
    333 TargetNVC0::getFileSize(DataFile file) const
    334 {
    335    switch (file) {
    336    case FILE_NULL:          return 0;
    337    case FILE_GPR:           return 63;
    338    case FILE_PREDICATE:     return 7;
    339    case FILE_FLAGS:         return 1;
    340    case FILE_ADDRESS:       return 0;
    341    case FILE_IMMEDIATE:     return 0;
    342    case FILE_MEMORY_CONST:  return 65536;
    343    case FILE_SHADER_INPUT:  return 0x400;
    344    case FILE_SHADER_OUTPUT: return 0x400;
    345    case FILE_MEMORY_GLOBAL: return 0xffffffff;
    346    case FILE_MEMORY_SHARED: return 16 << 10;
    347    case FILE_MEMORY_LOCAL:  return 48 << 10;
    348    case FILE_SYSTEM_VALUE:  return 32;
    349    default:
    350       assert(!"invalid file");
    351       return 0;
    352    }
    353 }
    354 
    355 unsigned int
    356 TargetNVC0::getFileUnit(DataFile file) const
    357 {
    358    if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE)
    359       return 2;
    360    return 0;
    361 }
    362 
    363 uint32_t
    364 TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
    365 {
    366    const int idx = sym->reg.data.sv.index;
    367    const SVSemantic sv = sym->reg.data.sv.sv;
    368 
    369    const bool isInput = shaderFile == FILE_SHADER_INPUT;
    370 
    371    switch (sv) {
    372    case SV_POSITION:       return 0x070 + idx * 4;
    373    case SV_INSTANCE_ID:    return 0x2f8;
    374    case SV_VERTEX_ID:      return 0x2fc;
    375    case SV_PRIMITIVE_ID:   return isInput ? 0x060 : 0x040;
    376    case SV_LAYER:          return 0x064;
    377    case SV_VIEWPORT_INDEX: return 0x068;
    378    case SV_POINT_SIZE:     return 0x06c;
    379    case SV_CLIP_DISTANCE:  return 0x2c0 + idx * 4;
    380    case SV_POINT_COORD:    return 0x2e0 + idx * 4;
    381    case SV_FACE:           return 0x3fc;
    382    case SV_TESS_FACTOR:    return 0x000 + idx * 4;
    383    case SV_TESS_COORD:     return 0x2f0 + idx * 4;
    384    default:
    385       return 0xffffffff;
    386    }
    387 }
    388 
    389 bool
    390 TargetNVC0::insnCanLoad(const Instruction *i, int s,
    391                         const Instruction *ld) const
    392 {
    393    DataFile sf = ld->src(0).getFile();
    394 
    395    // immediate 0 can be represented by GPR $r63
    396    if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
    397       return (!i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE);
    398 
    399    if (s >= opInfo[i->op].srcNr)
    400       return false;
    401    if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
    402       return false;
    403 
    404    // indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
    405    if (ld->src(0).isIndirect(0))
    406       return false;
    407 
    408    for (int k = 0; i->srcExists(k); ++k) {
    409       if (i->src(k).getFile() == FILE_IMMEDIATE) {
    410          if (i->getSrc(k)->reg.data.u64 != 0)
    411             return false;
    412       } else
    413       if (i->src(k).getFile() != FILE_GPR &&
    414           i->src(k).getFile() != FILE_PREDICATE) {
    415          return false;
    416       }
    417    }
    418 
    419    // not all instructions support full 32 bit immediates
    420    if (sf == FILE_IMMEDIATE) {
    421       Storage &reg = ld->getSrc(0)->asImm()->reg;
    422 
    423       if (opInfo[i->op].immdBits != 0xffffffff) {
    424          if (i->sType == TYPE_F32) {
    425             if (reg.data.u32 & 0xfff)
    426                return false;
    427          } else
    428          if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
    429             // with u32, 0xfffff counts as 0xffffffff as well
    430             if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
    431                return false;
    432          }
    433       } else
    434       if (i->op == OP_MAD || i->op == OP_FMA) {
    435          // requires src == dst, cannot decide before RA
    436          // (except if we implement more constraints)
    437          if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff)
    438             return false;
    439       }
    440    }
    441 
    442    return true;
    443 }
    444 
    445 bool
    446 TargetNVC0::isAccessSupported(DataFile file, DataType ty) const
    447 {
    448    if (ty == TYPE_NONE)
    449       return false;
    450    if (file == FILE_MEMORY_CONST && getChipset() >= 0xe0) // wrong encoding ?
    451       return typeSizeof(ty) <= 8;
    452    if (ty == TYPE_B96)
    453       return (file == FILE_SHADER_INPUT) || (file == FILE_SHADER_OUTPUT);
    454    return true;
    455 }
    456 
    457 bool
    458 TargetNVC0::isOpSupported(operation op, DataType ty) const
    459 {
    460    if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
    461       return false;
    462    if (op == OP_SAD && ty != TYPE_S32 && ty != TYPE_U32)
    463       return false;
    464    if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
    465       return false;
    466    return true;
    467 }
    468 
    469 bool
    470 TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
    471 {
    472    if (!isFloatType(insn->dType)) {
    473       switch (insn->op) {
    474       case OP_ABS:
    475       case OP_NEG:
    476       case OP_CVT:
    477       case OP_CEIL:
    478       case OP_FLOOR:
    479       case OP_TRUNC:
    480       case OP_AND:
    481       case OP_OR:
    482       case OP_XOR:
    483          break;
    484       case OP_ADD:
    485          if (mod.abs())
    486             return false;
    487          if (insn->src(s ? 0 : 1).mod.neg())
    488             return false;
    489          break;
    490       case OP_SUB:
    491          if (s == 0)
    492             return insn->src(1).mod.neg() ? false : true;
    493          break;
    494       default:
    495          return false;
    496       }
    497    }
    498    if (s > 3)
    499       return false;
    500    return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
    501 }
    502 
    503 bool
    504 TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const
    505 {
    506    if (insn->getPredicate())
    507       return false;
    508    return opInfo[insn->op].predicate;
    509 }
    510 
    511 bool
    512 TargetNVC0::isSatSupported(const Instruction *insn) const
    513 {
    514    if (insn->op == OP_CVT)
    515       return true;
    516    if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT))
    517       return false;
    518 
    519    if (insn->dType == TYPE_U32)
    520       return (insn->op == OP_ADD) || (insn->op == OP_MAD);
    521 
    522    return insn->dType == TYPE_F32;
    523 }
    524 
    525 bool
    526 TargetNVC0::isPostMultiplySupported(operation op, float f, int& e) const
    527 {
    528    if (op != OP_MUL)
    529       return false;
    530    f = fabsf(f);
    531    e = static_cast<int>(log2f(f));
    532    if (e < -3 || e > 3)
    533       return false;
    534    return f == exp2f(static_cast<float>(e));
    535 }
    536 
    537 // TODO: better values
    538 // this could be more precise, e.g. depending on the issue-to-read/write delay
    539 // of the depending instruction, but it's good enough
    540 int TargetNVC0::getLatency(const Instruction *i) const
    541 {
    542    if (chipset >= 0xe4) {
    543       if (i->dType == TYPE_F64 || i->sType == TYPE_F64)
    544          return 20;
    545       switch (i->op) {
    546       case OP_LINTERP:
    547       case OP_PINTERP:
    548          return 15;
    549       case OP_LOAD:
    550          if (i->src(0).getFile() == FILE_MEMORY_CONST)
    551             return 9;
    552          // fall through
    553       case OP_VFETCH:
    554          return 24;
    555       default:
    556          if (Target::getOpClass(i->op) == OPCLASS_TEXTURE)
    557             return 17;
    558          if (i->op == OP_MUL && i->dType != TYPE_F32)
    559             return 15;
    560          return 9;
    561       }
    562    } else {
    563       if (i->op == OP_LOAD) {
    564          if (i->cache == CACHE_CV)
    565             return 700;
    566          return 48;
    567       }
    568       return 24;
    569    }
    570    return 32;
    571 }
    572 
    573 // These are "inverse" throughput values, i.e. the number of cycles required
    574 // to issue a specific instruction for a full warp (32 threads).
    575 //
    576 // Assuming we have more than 1 warp in flight, a higher issue latency results
    577 // in a lower result latency since the MP will have spent more time with other
    578 // warps.
    579 // This also helps to determine the number of cycles between instructions in
    580 // a single warp.
    581 //
    582 int TargetNVC0::getThroughput(const Instruction *i) const
    583 {
    584    // TODO: better values
    585    if (i->dType == TYPE_F32) {
    586       switch (i->op) {
    587       case OP_ADD:
    588       case OP_MUL:
    589       case OP_MAD:
    590       case OP_FMA:
    591          return 1;
    592       case OP_CVT:
    593       case OP_CEIL:
    594       case OP_FLOOR:
    595       case OP_TRUNC:
    596       case OP_SET:
    597       case OP_SLCT:
    598       case OP_MIN:
    599       case OP_MAX:
    600          return 2;
    601       case OP_RCP:
    602       case OP_RSQ:
    603       case OP_LG2:
    604       case OP_SIN:
    605       case OP_COS:
    606       case OP_PRESIN:
    607       case OP_PREEX2:
    608       default:
    609          return 8;
    610       }
    611    } else
    612    if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
    613       switch (i->op) {
    614       case OP_ADD:
    615       case OP_AND:
    616       case OP_OR:
    617       case OP_XOR:
    618       case OP_NOT:
    619          return 1;
    620       case OP_MUL:
    621       case OP_MAD:
    622       case OP_CVT:
    623       case OP_SET:
    624       case OP_SLCT:
    625       case OP_SHL:
    626       case OP_SHR:
    627       case OP_NEG:
    628       case OP_ABS:
    629       case OP_MIN:
    630       case OP_MAX:
    631       default:
    632          return 2;
    633       }
    634    } else
    635    if (i->dType == TYPE_F64) {
    636       return 2;
    637    } else {
    638       return 1;
    639    }
    640 }
    641 
    642 bool TargetNVC0::canDualIssue(const Instruction *a, const Instruction *b) const
    643 {
    644    const OpClass clA = operationClass[a->op];
    645    const OpClass clB = operationClass[b->op];
    646 
    647    if (getChipset() >= 0xe4) {
    648       // not texturing
    649       // not if the 2nd instruction isn't necessarily executed
    650       if (clA == OPCLASS_TEXTURE || clA == OPCLASS_FLOW)
    651          return false;
    652       // anything with MOV
    653       if (a->op == OP_MOV || b->op == OP_MOV)
    654          return true;
    655       if (clA == clB) {
    656          // only F32 arith or integer additions
    657          if (clA != OPCLASS_ARITH)
    658             return false;
    659          return (a->dType == TYPE_F32 || a->op == OP_ADD ||
    660                  b->dType == TYPE_F32 || b->op == OP_ADD);
    661       }
    662       // nothing with TEXBAR
    663       if (a->op == OP_TEXBAR || b->op == OP_TEXBAR)
    664          return false;
    665       // no loads and stores accessing the the same space
    666       if ((clA == OPCLASS_LOAD && clB == OPCLASS_STORE) ||
    667           (clB == OPCLASS_LOAD && clA == OPCLASS_STORE))
    668          if (a->src(0).getFile() == b->src(0).getFile())
    669             return false;
    670       // no > 32-bit ops
    671       if (typeSizeof(a->dType) > 4 || typeSizeof(b->dType) > 4 ||
    672           typeSizeof(a->sType) > 4 || typeSizeof(b->sType) > 4)
    673          return false;
    674       return true;
    675    } else {
    676       return false; // info not needed (yet)
    677    }
    678 }
    679 
    680 } // namespace nv50_ir
    681