Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *           2014 Red Hat Inc.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be included in
     13  * all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21  * OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "codegen/nv50_ir.h"
     25 #include "codegen/nv50_ir_build_util.h"
     26 
     27 #include "codegen/nv50_ir_target_nvc0.h"
     28 #include "codegen/nv50_ir_lowering_gm107.h"
     29 
     30 #include <limits>
     31 
     32 namespace nv50_ir {
     33 
     34 #define QOP_ADD  0
     35 #define QOP_SUBR 1
     36 #define QOP_SUB  2
     37 #define QOP_MOV2 3
     38 
     39 //             UL UR LL LR
     40 #define QUADOP(q, r, s, t)                      \
     41    ((QOP_##q << 6) | (QOP_##r << 4) |           \
     42     (QOP_##s << 2) | (QOP_##t << 0))
     43 
     44 void
     45 GM107LegalizeSSA::handlePFETCH(Instruction *i)
     46 {
     47    Value *src0;
     48 
     49    if (i->src(0).getFile() == FILE_GPR && !i->srcExists(1))
     50       return;
     51 
     52    bld.setPosition(i, false);
     53    src0 = bld.getSSA();
     54 
     55    if (i->srcExists(1))
     56       bld.mkOp2(OP_ADD , TYPE_U32, src0, i->getSrc(0), i->getSrc(1));
     57    else
     58       bld.mkOp1(OP_MOV , TYPE_U32, src0, i->getSrc(0));
     59 
     60    i->setSrc(0, src0);
     61    i->setSrc(1, NULL);
     62 }
     63 
     64 void
     65 GM107LegalizeSSA::handleLOAD(Instruction *i)
     66 {
     67    if (i->src(0).getFile() != FILE_MEMORY_CONST)
     68       return;
     69    if (i->src(0).isIndirect(0))
     70       return;
     71    if (typeSizeof(i->dType) != 4)
     72       return;
     73 
     74    i->op = OP_MOV;
     75 }
     76 
     77 bool
     78 GM107LegalizeSSA::visit(Instruction *i)
     79 {
     80    switch (i->op) {
     81    case OP_PFETCH:
     82       handlePFETCH(i);
     83       break;
     84    case OP_LOAD:
     85       handleLOAD(i);
     86       break;
     87    default:
     88       break;
     89    }
     90    return true;
     91 }
     92 
     93 bool
     94 GM107LoweringPass::handleManualTXD(TexInstruction *i)
     95 {
     96    static const uint8_t qOps[4][2] =
     97    {
     98       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) }, // l0
     99       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD,  ADD) }, // l1
    100       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
    101       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
    102    };
    103    Value *def[4][4];
    104    Value *crd[3];
    105    Value *tmp;
    106    Instruction *tex, *add;
    107    Value *zero = bld.loadImm(bld.getSSA(), 0);
    108    int l, c;
    109    const int dim = i->tex.target.getDim() + i->tex.target.isCube();
    110    const int array = i->tex.target.isArray();
    111 
    112    i->op = OP_TEX; // no need to clone dPdx/dPdy later
    113 
    114    for (c = 0; c < dim; ++c)
    115       crd[c] = bld.getScratch();
    116    tmp = bld.getScratch();
    117 
    118    for (l = 0; l < 4; ++l) {
    119       Value *src[3], *val;
    120       // mov coordinates from lane l to all lanes
    121       bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
    122       for (c = 0; c < dim; ++c) {
    123          bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), bld.mkImm(l));
    124          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
    125          add->subOp = 0x00;
    126          add->lanes = 1; /* abused for .ndv */
    127       }
    128 
    129       // add dPdx from lane l to lanes dx
    130       for (c = 0; c < dim; ++c) {
    131          bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l));
    132          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
    133          add->subOp = qOps[l][0];
    134          add->lanes = 1; /* abused for .ndv */
    135       }
    136 
    137       // add dPdy from lane l to lanes dy
    138       for (c = 0; c < dim; ++c) {
    139          bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l));
    140          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
    141          add->subOp = qOps[l][1];
    142          add->lanes = 1; /* abused for .ndv */
    143       }
    144 
    145       // normalize cube coordinates if necessary
    146       if (i->tex.target.isCube()) {
    147          for (c = 0; c < 3; ++c)
    148             src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
    149          val = bld.getScratch();
    150          bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
    151          bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
    152          bld.mkOp1(OP_RCP, TYPE_F32, val, val);
    153          for (c = 0; c < 3; ++c)
    154             src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
    155       } else {
    156          for (c = 0; c < dim; ++c)
    157             src[c] = crd[c];
    158       }
    159 
    160       // texture
    161       bld.insert(tex = cloneForward(func, i));
    162       for (c = 0; c < dim; ++c)
    163          tex->setSrc(c + array, src[c]);
    164       bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
    165 
    166       // save results
    167       for (c = 0; i->defExists(c); ++c) {
    168          Instruction *mov;
    169          def[c][l] = bld.getSSA();
    170          mov = bld.mkMov(def[c][l], tex->getDef(c));
    171          mov->fixed = 1;
    172          mov->lanes = 1 << l;
    173       }
    174    }
    175 
    176    for (c = 0; i->defExists(c); ++c) {
    177       Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
    178       for (l = 0; l < 4; ++l)
    179          u->setSrc(l, def[c][l]);
    180    }
    181 
    182    i->bb->remove(i);
    183    return true;
    184 }
    185 
    186 bool
    187 GM107LoweringPass::handleDFDX(Instruction *insn)
    188 {
    189    Instruction *shfl;
    190    int qop = 0, xid = 0;
    191 
    192    switch (insn->op) {
    193    case OP_DFDX:
    194       qop = QUADOP(SUB, SUBR, SUB, SUBR);
    195       xid = 1;
    196       break;
    197    case OP_DFDY:
    198       qop = QUADOP(SUB, SUB, SUBR, SUBR);
    199       xid = 2;
    200       break;
    201    default:
    202       assert(!"invalid dfdx opcode");
    203       break;
    204    }
    205 
    206    shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(),
    207                     insn->getSrc(0), bld.mkImm(xid));
    208    shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
    209    insn->op = OP_QUADOP;
    210    insn->subOp = qop;
    211    insn->lanes = 0; /* abused for !.ndv */
    212    insn->setSrc(1, insn->getSrc(0));
    213    insn->setSrc(0, shfl->getDef(0));
    214    return true;
    215 }
    216 
    217 bool
    218 GM107LoweringPass::handlePFETCH(Instruction *i)
    219 {
    220    Value *tmp0 = bld.getScratch();
    221    Value *tmp1 = bld.getScratch();
    222    Value *tmp2 = bld.getScratch();
    223    bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
    224    bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
    225    bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
    226    bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
    227    if (i->getSrc(1))
    228       bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
    229    else
    230       bld.mkOp1(OP_MOV , TYPE_U32, tmp2, i->getSrc(0));
    231    bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
    232    i->setSrc(0, tmp0);
    233    i->setSrc(1, NULL);
    234    return true;
    235 }
    236 
    237 bool
    238 GM107LoweringPass::handlePOPCNT(Instruction *i)
    239 {
    240    Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
    241                            i->getSrc(0), i->getSrc(1));
    242    i->setSrc(0, tmp);
    243    i->setSrc(1, NULL);
    244    return true;
    245 }
    246 
    247 //
    248 // - add quadop dance for texturing
    249 // - put FP outputs in GPRs
    250 // - convert instruction sequences
    251 //
    252 bool
    253 GM107LoweringPass::visit(Instruction *i)
    254 {
    255    bld.setPosition(i, false);
    256 
    257    if (i->cc != CC_ALWAYS)
    258       checkPredicate(i);
    259 
    260    switch (i->op) {
    261    case OP_PFETCH:
    262       return handlePFETCH(i);
    263    case OP_DFDX:
    264    case OP_DFDY:
    265       return handleDFDX(i);
    266    case OP_POPCNT:
    267       return handlePOPCNT(i);
    268    default:
    269       return NVC0LoweringPass::visit(i);
    270    }
    271 }
    272 
    273 } // namespace nv50_ir
    274