1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 2014 Red Hat Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "codegen/nv50_ir.h" 25 #include "codegen/nv50_ir_build_util.h" 26 27 #include "codegen/nv50_ir_target_nvc0.h" 28 #include "codegen/nv50_ir_lowering_gm107.h" 29 30 #include <limits> 31 32 namespace nv50_ir { 33 34 #define QOP_ADD 0 35 #define QOP_SUBR 1 36 #define QOP_SUB 2 37 #define QOP_MOV2 3 38 39 // UL UR LL LR 40 #define QUADOP(q, r, s, t) \ 41 ((QOP_##q << 6) | (QOP_##r << 4) | \ 42 (QOP_##s << 2) | (QOP_##t << 0)) 43 44 void 45 GM107LegalizeSSA::handlePFETCH(Instruction *i) 46 { 47 Value *src0; 48 49 if (i->src(0).getFile() == FILE_GPR && !i->srcExists(1)) 50 return; 51 52 bld.setPosition(i, false); 53 src0 = bld.getSSA(); 54 55 if (i->srcExists(1)) 56 bld.mkOp2(OP_ADD , TYPE_U32, src0, i->getSrc(0), i->getSrc(1)); 57 else 58 bld.mkOp1(OP_MOV , TYPE_U32, src0, i->getSrc(0)); 59 60 i->setSrc(0, src0); 61 i->setSrc(1, NULL); 62 } 63 64 void 65 GM107LegalizeSSA::handleLOAD(Instruction *i) 66 { 67 if (i->src(0).getFile() != FILE_MEMORY_CONST) 68 return; 69 if (i->src(0).isIndirect(0)) 70 return; 71 if (typeSizeof(i->dType) != 4) 72 return; 73 74 i->op = OP_MOV; 75 } 76 77 bool 78 GM107LegalizeSSA::visit(Instruction *i) 79 { 80 switch (i->op) { 81 case OP_PFETCH: 82 handlePFETCH(i); 83 break; 84 case OP_LOAD: 85 handleLOAD(i); 86 break; 87 default: 88 break; 89 } 90 return true; 91 } 92 93 bool 94 GM107LoweringPass::handleManualTXD(TexInstruction *i) 95 { 96 static const uint8_t qOps[4][2] = 97 { 98 { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 99 { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 100 { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 101 { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 102 }; 103 Value *def[4][4]; 104 Value *crd[3]; 105 Value *tmp; 106 Instruction *tex, *add; 107 Value *zero = bld.loadImm(bld.getSSA(), 0); 108 int l, c; 109 const int dim = i->tex.target.getDim() + i->tex.target.isCube(); 110 const int array = i->tex.target.isArray(); 111 112 i->op = OP_TEX; // no need to clone dPdx/dPdy later 113 114 for (c = 0; c < dim; ++c) 115 crd[c] = bld.getScratch(); 116 tmp = bld.getScratch(); 117 118 for (l = 0; l < 4; ++l) { 119 Value *src[3], *val; 120 // mov coordinates from lane l to all lanes 121 bld.mkOp(OP_QUADON, TYPE_NONE, NULL); 122 for (c = 0; c < dim; ++c) { 123 bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), bld.mkImm(l)); 124 add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero); 125 add->subOp = 0x00; 126 add->lanes = 1; /* abused for .ndv */ 127 } 128 129 // add dPdx from lane l to lanes dx 130 for (c = 0; c < dim; ++c) { 131 bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l)); 132 add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]); 133 add->subOp = qOps[l][0]; 134 add->lanes = 1; /* abused for .ndv */ 135 } 136 137 // add dPdy from lane l to lanes dy 138 for (c = 0; c < dim; ++c) { 139 bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l)); 140 add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]); 141 add->subOp = qOps[l][1]; 142 add->lanes = 1; /* abused for .ndv */ 143 } 144 145 // normalize cube coordinates if necessary 146 if (i->tex.target.isCube()) { 147 for (c = 0; c < 3; ++c) 148 src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]); 149 val = bld.getScratch(); 150 bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); 151 bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); 152 bld.mkOp1(OP_RCP, TYPE_F32, val, val); 153 for (c = 0; c < 3; ++c) 154 src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val); 155 } else { 156 for (c = 0; c < dim; ++c) 157 src[c] = crd[c]; 158 } 159 160 // texture 161 bld.insert(tex = cloneForward(func, i)); 162 for (c = 0; c < dim; ++c) 163 tex->setSrc(c + array, src[c]); 164 bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); 165 166 // save results 167 for (c = 0; i->defExists(c); ++c) { 168 Instruction *mov; 169 def[c][l] = bld.getSSA(); 170 mov = bld.mkMov(def[c][l], tex->getDef(c)); 171 mov->fixed = 1; 172 mov->lanes = 1 << l; 173 } 174 } 175 176 for (c = 0; i->defExists(c); ++c) { 177 Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); 178 for (l = 0; l < 4; ++l) 179 u->setSrc(l, def[c][l]); 180 } 181 182 i->bb->remove(i); 183 return true; 184 } 185 186 bool 187 GM107LoweringPass::handleDFDX(Instruction *insn) 188 { 189 Instruction *shfl; 190 int qop = 0, xid = 0; 191 192 switch (insn->op) { 193 case OP_DFDX: 194 qop = QUADOP(SUB, SUBR, SUB, SUBR); 195 xid = 1; 196 break; 197 case OP_DFDY: 198 qop = QUADOP(SUB, SUB, SUBR, SUBR); 199 xid = 2; 200 break; 201 default: 202 assert(!"invalid dfdx opcode"); 203 break; 204 } 205 206 shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(), 207 insn->getSrc(0), bld.mkImm(xid)); 208 shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY; 209 insn->op = OP_QUADOP; 210 insn->subOp = qop; 211 insn->lanes = 0; /* abused for !.ndv */ 212 insn->setSrc(1, insn->getSrc(0)); 213 insn->setSrc(0, shfl->getDef(0)); 214 return true; 215 } 216 217 bool 218 GM107LoweringPass::handlePFETCH(Instruction *i) 219 { 220 Value *tmp0 = bld.getScratch(); 221 Value *tmp1 = bld.getScratch(); 222 Value *tmp2 = bld.getScratch(); 223 bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0)); 224 bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16)); 225 bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff)); 226 bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff)); 227 if (i->getSrc(1)) 228 bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1)); 229 else 230 bld.mkOp1(OP_MOV , TYPE_U32, tmp2, i->getSrc(0)); 231 bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2); 232 i->setSrc(0, tmp0); 233 i->setSrc(1, NULL); 234 return true; 235 } 236 237 bool 238 GM107LoweringPass::handlePOPCNT(Instruction *i) 239 { 240 Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(), 241 i->getSrc(0), i->getSrc(1)); 242 i->setSrc(0, tmp); 243 i->setSrc(1, NULL); 244 return true; 245 } 246 247 // 248 // - add quadop dance for texturing 249 // - put FP outputs in GPRs 250 // - convert instruction sequences 251 // 252 bool 253 GM107LoweringPass::visit(Instruction *i) 254 { 255 bld.setPosition(i, false); 256 257 if (i->cc != CC_ALWAYS) 258 checkPredicate(i); 259 260 switch (i->op) { 261 case OP_PFETCH: 262 return handlePFETCH(i); 263 case OP_DFDX: 264 case OP_DFDY: 265 return handleDFDX(i); 266 case OP_POPCNT: 267 return handlePOPCNT(i); 268 default: 269 return NVC0LoweringPass::visit(i); 270 } 271 } 272 273 } // namespace nv50_ir 274