Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *           2014 Red Hat Inc.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be included in
     13  * all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21  * OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "codegen/nv50_ir.h"
     25 #include "codegen/nv50_ir_build_util.h"
     26 
     27 #include "codegen/nv50_ir_target_nvc0.h"
     28 #include "codegen/nv50_ir_lowering_gm107.h"
     29 
     30 #include <limits>
     31 
     32 namespace nv50_ir {
     33 
     34 #define QOP_ADD  0
     35 #define QOP_SUBR 1
     36 #define QOP_SUB  2
     37 #define QOP_MOV2 3
     38 
     39 //             UL UR LL LR
     40 #define QUADOP(q, r, s, t)                      \
     41    ((QOP_##q << 6) | (QOP_##r << 4) |           \
     42     (QOP_##s << 2) | (QOP_##t << 0))
     43 
     44 #define SHFL_BOUND_QUAD 0x1c03
     45 
     46 void
     47 GM107LegalizeSSA::handlePFETCH(Instruction *i)
     48 {
     49    Value *src0;
     50 
     51    if (i->src(0).getFile() == FILE_GPR && !i->srcExists(1))
     52       return;
     53 
     54    bld.setPosition(i, false);
     55    src0 = bld.getSSA();
     56 
     57    if (i->srcExists(1))
     58       bld.mkOp2(OP_ADD , TYPE_U32, src0, i->getSrc(0), i->getSrc(1));
     59    else
     60       bld.mkOp1(OP_MOV , TYPE_U32, src0, i->getSrc(0));
     61 
     62    i->setSrc(0, src0);
     63    i->setSrc(1, NULL);
     64 }
     65 
     66 void
     67 GM107LegalizeSSA::handleLOAD(Instruction *i)
     68 {
     69    if (i->src(0).getFile() != FILE_MEMORY_CONST)
     70       return;
     71    if (i->src(0).isIndirect(0))
     72       return;
     73    if (typeSizeof(i->dType) != 4)
     74       return;
     75 
     76    i->op = OP_MOV;
     77 }
     78 
     79 bool
     80 GM107LegalizeSSA::visit(Instruction *i)
     81 {
     82    switch (i->op) {
     83    case OP_PFETCH:
     84       handlePFETCH(i);
     85       break;
     86    case OP_LOAD:
     87       handleLOAD(i);
     88       break;
     89    default:
     90       break;
     91    }
     92    return true;
     93 }
     94 
     95 bool
     96 GM107LoweringPass::handleManualTXD(TexInstruction *i)
     97 {
     98    // See NVC0LoweringPass::handleManualTXD for rationale. This function
     99    // implements the same logic, but using SM50-friendly primitives.
    100    static const uint8_t qOps[2] =
    101       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) };
    102    Value *def[4][4];
    103    Value *crd[3], *arr, *shadow;
    104    Value *tmp;
    105    Instruction *tex, *add;
    106    Value *quad = bld.mkImm(SHFL_BOUND_QUAD);
    107    int l, c;
    108    const int dim = i->tex.target.getDim() + i->tex.target.isCube();
    109    const int array = i->tex.target.isArray();
    110    const int indirect = i->tex.rIndirectSrc >= 0;
    111 
    112    i->op = OP_TEX; // no need to clone dPdx/dPdy later
    113 
    114    for (c = 0; c < dim; ++c)
    115       crd[c] = bld.getScratch();
    116    arr = bld.getScratch();
    117    shadow = bld.getScratch();
    118    tmp = bld.getScratch();
    119 
    120    for (l = 0; l < 4; ++l) {
    121       Value *src[3], *val;
    122       Value *lane = bld.mkImm(l);
    123       bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
    124       // Make sure lane 0 has the appropriate array/depth compare values
    125       if (l != 0) {
    126          if (array)
    127             bld.mkOp3(OP_SHFL, TYPE_F32, arr, i->getSrc(0), lane, quad);
    128          if (i->tex.target.isShadow())
    129             bld.mkOp3(OP_SHFL, TYPE_F32, shadow, i->getSrc(array + dim + indirect), lane, quad);
    130       }
    131 
    132       // mov coordinates from lane l to all lanes
    133       for (c = 0; c < dim; ++c) {
    134          bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), lane, quad);
    135       }
    136 
    137       // add dPdx from lane l to lanes dx
    138       for (c = 0; c < dim; ++c) {
    139          bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), lane, quad);
    140          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
    141          add->subOp = qOps[0];
    142          add->lanes = 1; /* abused for .ndv */
    143       }
    144 
    145       // add dPdy from lane l to lanes dy
    146       for (c = 0; c < dim; ++c) {
    147          bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), lane, quad);
    148          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
    149          add->subOp = qOps[1];
    150          add->lanes = 1; /* abused for .ndv */
    151       }
    152 
    153       // normalize cube coordinates if necessary
    154       if (i->tex.target.isCube()) {
    155          for (c = 0; c < 3; ++c)
    156             src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
    157          val = bld.getScratch();
    158          bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
    159          bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
    160          bld.mkOp1(OP_RCP, TYPE_F32, val, val);
    161          for (c = 0; c < 3; ++c)
    162             src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
    163       } else {
    164          for (c = 0; c < dim; ++c)
    165             src[c] = crd[c];
    166       }
    167 
    168       // texture
    169       bld.insert(tex = cloneForward(func, i));
    170       if (l != 0) {
    171          if (array)
    172             tex->setSrc(0, arr);
    173          if (i->tex.target.isShadow())
    174             tex->setSrc(array + dim + indirect, shadow);
    175       }
    176       for (c = 0; c < dim; ++c)
    177          tex->setSrc(c + array, src[c]);
    178       // broadcast results from lane 0 to all lanes
    179       if (l != 0)
    180          for (c = 0; i->defExists(c); ++c)
    181             bld.mkOp3(OP_SHFL, TYPE_F32, tex->getDef(c), tex->getDef(c), bld.mkImm(0), quad);
    182       bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
    183 
    184       // save results
    185       for (c = 0; i->defExists(c); ++c) {
    186          Instruction *mov;
    187          def[c][l] = bld.getSSA();
    188          mov = bld.mkMov(def[c][l], tex->getDef(c));
    189          mov->fixed = 1;
    190          mov->lanes = 1 << l;
    191       }
    192    }
    193 
    194    for (c = 0; i->defExists(c); ++c) {
    195       Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
    196       for (l = 0; l < 4; ++l)
    197          u->setSrc(l, def[c][l]);
    198    }
    199 
    200    i->bb->remove(i);
    201    return true;
    202 }
    203 
    204 bool
    205 GM107LoweringPass::handleDFDX(Instruction *insn)
    206 {
    207    Instruction *shfl;
    208    int qop = 0, xid = 0;
    209 
    210    switch (insn->op) {
    211    case OP_DFDX:
    212       qop = QUADOP(SUB, SUBR, SUB, SUBR);
    213       xid = 1;
    214       break;
    215    case OP_DFDY:
    216       qop = QUADOP(SUB, SUB, SUBR, SUBR);
    217       xid = 2;
    218       break;
    219    default:
    220       assert(!"invalid dfdx opcode");
    221       break;
    222    }
    223 
    224    shfl = bld.mkOp3(OP_SHFL, TYPE_F32, bld.getScratch(), insn->getSrc(0),
    225                     bld.mkImm(xid), bld.mkImm(SHFL_BOUND_QUAD));
    226    shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
    227    insn->op = OP_QUADOP;
    228    insn->subOp = qop;
    229    insn->lanes = 0; /* abused for !.ndv */
    230    insn->setSrc(1, insn->getSrc(0));
    231    insn->setSrc(0, shfl->getDef(0));
    232    return true;
    233 }
    234 
    235 bool
    236 GM107LoweringPass::handlePFETCH(Instruction *i)
    237 {
    238    Value *tmp0 = bld.getScratch();
    239    Value *tmp1 = bld.getScratch();
    240    Value *tmp2 = bld.getScratch();
    241    bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
    242    bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
    243    bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
    244    bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
    245    if (i->getSrc(1))
    246       bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
    247    else
    248       bld.mkOp1(OP_MOV , TYPE_U32, tmp2, i->getSrc(0));
    249    bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
    250    i->setSrc(0, tmp0);
    251    i->setSrc(1, NULL);
    252    return true;
    253 }
    254 
    255 bool
    256 GM107LoweringPass::handlePOPCNT(Instruction *i)
    257 {
    258    Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
    259                            i->getSrc(0), i->getSrc(1));
    260    i->setSrc(0, tmp);
    261    i->setSrc(1, NULL);
    262    return true;
    263 }
    264 
    265 //
    266 // - add quadop dance for texturing
    267 // - put FP outputs in GPRs
    268 // - convert instruction sequences
    269 //
    270 bool
    271 GM107LoweringPass::visit(Instruction *i)
    272 {
    273    bld.setPosition(i, false);
    274 
    275    if (i->cc != CC_ALWAYS)
    276       checkPredicate(i);
    277 
    278    switch (i->op) {
    279    case OP_PFETCH:
    280       return handlePFETCH(i);
    281    case OP_DFDX:
    282    case OP_DFDY:
    283       return handleDFDX(i);
    284    case OP_POPCNT:
    285       return handlePOPCNT(i);
    286    default:
    287       return NVC0LoweringPass::visit(i);
    288    }
    289 }
    290 
    291 } // namespace nv50_ir
    292