1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 2014 Red Hat Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "codegen/nv50_ir_target_gm107.h" 25 #include "codegen/nv50_ir_lowering_gm107.h" 26 27 namespace nv50_ir { 28 29 Target *getTargetGM107(unsigned int chipset) 30 { 31 return new TargetGM107(chipset); 32 } 33 34 // BULTINS / LIBRARY FUNCTIONS: 35 36 // lazyness -> will just hardcode everything for the time being 37 38 #include "lib/gm107.asm.h" 39 40 void 41 TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const 42 { 43 *code = (const uint32_t *)&gm107_builtin_code[0]; 44 *size = sizeof(gm107_builtin_code); 45 } 46 47 uint32_t 48 TargetGM107::getBuiltinOffset(int builtin) const 49 { 50 assert(builtin < NVC0_BUILTIN_COUNT); 51 return gm107_builtin_offsets[builtin]; 52 } 53 54 bool 55 TargetGM107::isOpSupported(operation op, DataType ty) const 56 { 57 switch (op) { 58 case OP_SAD: 59 case OP_POW: 60 case OP_SQRT: 61 case OP_DIV: 62 case OP_MOD: 63 return false; 64 default: 65 break; 66 } 67 68 return true; 69 } 70 71 // Return true when an instruction supports the reuse flag. When supported, the 72 // hardware will use the operand reuse cache introduced since Maxwell, which 73 // should try to reduce bank conflicts by caching values for the subsequent 74 // instructions. Note that the next instructions have to use the same GPR id in 75 // the same operand slot. 76 bool 77 TargetGM107::isReuseSupported(const Instruction *insn) const 78 { 79 const OpClass cl = getOpClass(insn->op); 80 81 // TODO: double-check! 82 switch (cl) { 83 case OPCLASS_ARITH: 84 case OPCLASS_COMPARE: 85 case OPCLASS_LOGIC: 86 case OPCLASS_MOVE: 87 case OPCLASS_SHIFT: 88 return true; 89 case OPCLASS_BITFIELD: 90 if (insn->op == OP_INSBF || insn->op == OP_EXTBF) 91 return true; 92 break; 93 default: 94 break; 95 } 96 return false; 97 } 98 99 // Return true when an instruction requires to set up a barrier because it 100 // doesn't operate at a fixed latency. Variable latency instructions are memory 101 // operations, double precision operations, special function unit operations 102 // and other low throughput instructions. 103 bool 104 TargetGM107::isBarrierRequired(const Instruction *insn) const 105 { 106 const OpClass cl = getOpClass(insn->op); 107 108 if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64) 109 return true; 110 111 switch (cl) { 112 case OPCLASS_ATOMIC: 113 case OPCLASS_LOAD: 114 case OPCLASS_STORE: 115 case OPCLASS_SURFACE: 116 case OPCLASS_TEXTURE: 117 return true; 118 case OPCLASS_SFU: 119 switch (insn->op) { 120 case OP_COS: 121 case OP_EX2: 122 case OP_LG2: 123 case OP_LINTERP: 124 case OP_PINTERP: 125 case OP_RCP: 126 case OP_RSQ: 127 case OP_SIN: 128 return true; 129 default: 130 break; 131 } 132 break; 133 case OPCLASS_BITFIELD: 134 switch (insn->op) { 135 case OP_BFIND: 136 case OP_POPCNT: 137 return true; 138 default: 139 break; 140 } 141 break; 142 case OPCLASS_CONTROL: 143 switch (insn->op) { 144 case OP_EMIT: 145 case OP_RESTART: 146 return true; 147 default: 148 break; 149 } 150 break; 151 case OPCLASS_OTHER: 152 switch (insn->op) { 153 case OP_AFETCH: 154 case OP_PFETCH: 155 case OP_PIXLD: 156 case OP_RDSV: 157 case OP_SHFL: 158 return true; 159 default: 160 break; 161 } 162 break; 163 case OPCLASS_ARITH: 164 // TODO: IMUL/IMAD require barriers too, use of XMAD instead! 165 if ((insn->op == OP_MUL || insn->op == OP_MAD) && 166 !isFloatType(insn->dType)) 167 return true; 168 break; 169 case OPCLASS_CONVERT: 170 if (insn->def(0).getFile() != FILE_PREDICATE && 171 insn->src(0).getFile() != FILE_PREDICATE) 172 return true; 173 break; 174 default: 175 break; 176 } 177 return false; 178 } 179 180 bool 181 TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const 182 { 183 // TODO 184 return false; 185 } 186 187 // Return the number of stall counts needed to complete a single instruction. 188 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require 189 // different number of stall counts like memory operations. 190 int 191 TargetGM107::getLatency(const Instruction *insn) const 192 { 193 // TODO: better values! This should be good enough for now though. 194 switch (insn->op) { 195 case OP_EMIT: 196 case OP_EXPORT: 197 case OP_PIXLD: 198 case OP_RESTART: 199 case OP_STORE: 200 case OP_SUSTB: 201 case OP_SUSTP: 202 return 1; 203 case OP_SHFL: 204 return 2; 205 case OP_ADD: 206 case OP_AND: 207 case OP_EXTBF: 208 case OP_FMA: 209 case OP_INSBF: 210 case OP_MAD: 211 case OP_MAX: 212 case OP_MIN: 213 case OP_MOV: 214 case OP_MUL: 215 case OP_NOT: 216 case OP_OR: 217 case OP_PREEX2: 218 case OP_PRESIN: 219 case OP_QUADOP: 220 case OP_SELP: 221 case OP_SET: 222 case OP_SET_AND: 223 case OP_SET_OR: 224 case OP_SET_XOR: 225 case OP_SHL: 226 case OP_SHLADD: 227 case OP_SHR: 228 case OP_SLCT: 229 case OP_SUB: 230 case OP_VOTE: 231 case OP_XOR: 232 if (insn->dType != TYPE_F64) 233 return 6; 234 break; 235 case OP_ABS: 236 case OP_CEIL: 237 case OP_CVT: 238 case OP_FLOOR: 239 case OP_NEG: 240 case OP_SAT: 241 case OP_TRUNC: 242 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE || 243 insn->src(0).getFile() == FILE_PREDICATE)) 244 return 6; 245 break; 246 case OP_BFIND: 247 case OP_COS: 248 case OP_EX2: 249 case OP_LG2: 250 case OP_POPCNT: 251 case OP_QUADON: 252 case OP_QUADPOP: 253 case OP_RCP: 254 case OP_RSQ: 255 case OP_SIN: 256 return 13; 257 default: 258 break; 259 } 260 // Use the maximum number of stall counts for other instructions. 261 return 15; 262 } 263 264 // Return the operand read latency which is the number of stall counts before 265 // an instruction can read its sources. For memory operations like ATOM, LOAD 266 // and STORE, the memory access has to be indirect. 267 int 268 TargetGM107::getReadLatency(const Instruction *insn) const 269 { 270 switch (insn->op) { 271 case OP_ABS: 272 case OP_BFIND: 273 case OP_CEIL: 274 case OP_COS: 275 case OP_EX2: 276 case OP_FLOOR: 277 case OP_LG2: 278 case OP_NEG: 279 case OP_POPCNT: 280 case OP_RCP: 281 case OP_RSQ: 282 case OP_SAT: 283 case OP_SIN: 284 case OP_SULDB: 285 case OP_SULDP: 286 case OP_SUREDB: 287 case OP_SUREDP: 288 case OP_SUSTB: 289 case OP_SUSTP: 290 case OP_TRUNC: 291 return 4; 292 case OP_CVT: 293 if (insn->def(0).getFile() != FILE_PREDICATE && 294 insn->src(0).getFile() != FILE_PREDICATE) 295 return 4; 296 break; 297 case OP_ATOM: 298 case OP_LOAD: 299 case OP_STORE: 300 if (insn->src(0).isIndirect(0)) { 301 switch (insn->src(0).getFile()) { 302 case FILE_MEMORY_SHARED: 303 case FILE_MEMORY_CONST: 304 return 2; 305 case FILE_MEMORY_GLOBAL: 306 case FILE_MEMORY_LOCAL: 307 return 4; 308 default: 309 break; 310 } 311 } 312 break; 313 case OP_EXPORT: 314 case OP_PFETCH: 315 case OP_SHFL: 316 case OP_VFETCH: 317 return 2; 318 default: 319 break; 320 } 321 return 0; 322 } 323 324 bool 325 TargetGM107::runLegalizePass(Program *prog, CGStage stage) const 326 { 327 if (stage == CG_STAGE_PRE_SSA) { 328 GM107LoweringPass pass(prog); 329 return pass.run(prog, false, true); 330 } else 331 if (stage == CG_STAGE_POST_RA) { 332 NVC0LegalizePostRA pass(prog); 333 return pass.run(prog, false, true); 334 } else 335 if (stage == CG_STAGE_SSA) { 336 GM107LegalizeSSA pass; 337 return pass.run(prog, false, true); 338 } 339 return false; 340 } 341 342 CodeEmitter * 343 TargetGM107::getCodeEmitter(Program::Type type) 344 { 345 return createCodeEmitterGM107(type); 346 } 347 348 } // namespace nv50_ir 349