1 #include <float.h> 2 #include "pipe/p_context.h" 3 #include "pipe/p_defines.h" 4 #include "pipe/p_state.h" 5 #include "util/u_dynarray.h" 6 #include "util/u_inlines.h" 7 #include "util/u_debug.h" 8 9 #include "pipe/p_shader_tokens.h" 10 #include "tgsi/tgsi_parse.h" 11 #include "tgsi/tgsi_util.h" 12 #include "tgsi/tgsi_dump.h" 13 #include "tgsi/tgsi_ureg.h" 14 15 #include "nouveau_debug.h" 16 #include "nv_object.xml.h" 17 #include "nv30/nv30-40_3d.xml.h" 18 #include "nv30/nvfx_shader.h" 19 #include "nv30/nv30_state.h" 20 21 struct nvfx_fpc { 22 struct nv30_fragprog *fp; 23 24 unsigned max_temps; 25 unsigned long long r_temps; 26 unsigned long long r_temps_discard; 27 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; 28 struct nvfx_reg r_input[PIPE_MAX_SHADER_INPUTS]; 29 struct nvfx_reg *r_temp; 30 31 int num_regs; 32 33 unsigned inst_offset; 34 unsigned have_const; 35 unsigned is_nv4x; 36 37 struct util_dynarray imm_data; 38 39 struct nvfx_reg* r_imm; 40 unsigned nr_imm; 41 42 struct util_dynarray if_stack; 43 //struct util_dynarray loop_stack; 44 struct util_dynarray label_relocs; 45 }; 46 47 static inline struct nvfx_reg 48 temp(struct nvfx_fpc *fpc) 49 { 50 int idx = __builtin_ctzll(~fpc->r_temps); 51 52 if (idx >= fpc->max_temps) { 53 NOUVEAU_ERR("out of temps!!\n"); 54 return nvfx_reg(NVFXSR_TEMP, 0); 55 } 56 57 fpc->r_temps |= (1ULL << idx); 58 fpc->r_temps_discard |= (1ULL << idx); 59 return nvfx_reg(NVFXSR_TEMP, idx); 60 } 61 62 static inline void 63 release_temps(struct nvfx_fpc *fpc) 64 { 65 fpc->r_temps &= ~fpc->r_temps_discard; 66 fpc->r_temps_discard = 0ULL; 67 } 68 69 static inline struct nvfx_reg 70 nvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d) 71 { 72 float v[4] = {a, b, c, d}; 73 int idx = fpc->imm_data.size >> 4; 74 75 memcpy(util_dynarray_grow(&fpc->imm_data, sizeof(float) * 4), v, 4 * sizeof(float)); 76 return nvfx_reg(NVFXSR_IMM, idx); 77 } 78 79 static void 80 grow_insns(struct nvfx_fpc *fpc, int size) 81 { 82 struct nv30_fragprog *fp = fpc->fp; 83 84 fp->insn_len += size; 85 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); 86 } 87 88 static void 89 emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src) 90 { 91 struct nv30_fragprog *fp = fpc->fp; 92 uint32_t *hw = &fp->insn[fpc->inst_offset]; 93 uint32_t sr = 0; 94 95 switch (src.reg.type) { 96 case NVFXSR_INPUT: 97 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); 98 hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT); 99 break; 100 case NVFXSR_OUTPUT: 101 sr |= NVFX_FP_REG_SRC_HALF; 102 /* fall-through */ 103 case NVFXSR_TEMP: 104 sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); 105 sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT); 106 break; 107 case NVFXSR_IMM: 108 if (!fpc->have_const) { 109 grow_insns(fpc, 4); 110 hw = &fp->insn[fpc->inst_offset]; 111 fpc->have_const = 1; 112 } 113 114 memcpy(&fp->insn[fpc->inst_offset + 4], 115 (float*)fpc->imm_data.data + src.reg.index * 4, 116 sizeof(uint32_t) * 4); 117 118 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); 119 break; 120 case NVFXSR_CONST: 121 if (!fpc->have_const) { 122 grow_insns(fpc, 4); 123 hw = &fp->insn[fpc->inst_offset]; 124 fpc->have_const = 1; 125 } 126 127 { 128 struct nv30_fragprog_data *fpd; 129 130 fp->consts = realloc(fp->consts, ++fp->nr_consts * 131 sizeof(*fpd)); 132 fpd = &fp->consts[fp->nr_consts - 1]; 133 fpd->offset = fpc->inst_offset + 4; 134 fpd->index = src.reg.index; 135 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); 136 } 137 138 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); 139 break; 140 case NVFXSR_NONE: 141 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); 142 break; 143 default: 144 assert(0); 145 } 146 147 if (src.negate) 148 sr |= NVFX_FP_REG_NEGATE; 149 150 if (src.abs) 151 hw[1] |= (1 << (29 + pos)); 152 153 sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | 154 (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | 155 (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | 156 (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); 157 158 hw[pos + 1] |= sr; 159 } 160 161 static void 162 emit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst) 163 { 164 struct nv30_fragprog *fp = fpc->fp; 165 uint32_t *hw = &fp->insn[fpc->inst_offset]; 166 167 switch (dst.type) { 168 case NVFXSR_OUTPUT: 169 if (dst.index == 1) 170 fp->fp_control |= 0x0000000e; 171 else { 172 hw[0] |= NVFX_FP_OP_OUT_REG_HALF; 173 dst.index <<= 1; 174 } 175 /* fall-through */ 176 case NVFXSR_TEMP: 177 if (fpc->num_regs < (dst.index + 1)) 178 fpc->num_regs = dst.index + 1; 179 break; 180 case NVFXSR_NONE: 181 hw[0] |= (1 << 30); 182 break; 183 default: 184 assert(0); 185 } 186 187 hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); 188 } 189 190 static void 191 nvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn) 192 { 193 struct nv30_fragprog *fp = fpc->fp; 194 uint32_t *hw; 195 196 fpc->inst_offset = fp->insn_len; 197 fpc->have_const = 0; 198 grow_insns(fpc, 4); 199 hw = &fp->insn[fpc->inst_offset]; 200 memset(hw, 0, sizeof(uint32_t) * 4); 201 202 if (insn.op == NVFX_FP_OP_OPCODE_KIL) 203 fp->fp_control |= NV30_3D_FP_CONTROL_USES_KIL; 204 hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT); 205 hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT); 206 hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT); 207 208 if (insn.sat) 209 hw[0] |= NVFX_FP_OP_OUT_SAT; 210 211 if (insn.cc_update) 212 hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; 213 hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT); 214 hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 215 (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | 216 (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | 217 (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); 218 219 if(insn.unit >= 0) 220 { 221 hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT); 222 } 223 224 emit_dst(fpc, insn.dst); 225 emit_src(fpc, 0, insn.src[0]); 226 emit_src(fpc, 1, insn.src[1]); 227 emit_src(fpc, 2, insn.src[2]); 228 } 229 230 #define arith(s,o,d,m,s0,s1,s2) \ 231 nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \ 232 (d), (m), (s0), (s1), (s2)) 233 234 #define tex(s,o,u,d,m,s0,s1,s2) \ 235 nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \ 236 (d), (m), (s0), none, none) 237 238 /* IF src.x != 0, as TGSI specifies */ 239 static void 240 nv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src) 241 { 242 const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 243 struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none); 244 uint32_t *hw; 245 insn.cc_update = 1; 246 nvfx_fp_emit(fpc, insn); 247 248 fpc->inst_offset = fpc->fp->insn_len; 249 grow_insns(fpc, 4); 250 hw = &fpc->fp->insn[fpc->inst_offset]; 251 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 252 hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | 253 NV40_FP_OP_OUT_NONE | 254 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 255 /* Use .xxxx swizzle so that we check only src[0].x*/ 256 hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 257 (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | 258 (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | 259 (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) | 260 (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT); 261 hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */ 262 hw[3] = 0; /* | endif_offset */ 263 util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset); 264 } 265 266 /* IF src.x != 0, as TGSI specifies */ 267 static void 268 nv40_fp_cal(struct nvfx_fpc *fpc, unsigned target) 269 { 270 struct nvfx_relocation reloc; 271 uint32_t *hw; 272 fpc->inst_offset = fpc->fp->insn_len; 273 grow_insns(fpc, 4); 274 hw = &fpc->fp->insn[fpc->inst_offset]; 275 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 276 hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT); 277 /* Use .xxxx swizzle so that we check only src[0].x*/ 278 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 279 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 280 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ 281 hw[3] = 0; 282 reloc.target = target; 283 reloc.location = fpc->inst_offset + 2; 284 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 285 } 286 287 static void 288 nv40_fp_ret(struct nvfx_fpc *fpc) 289 { 290 uint32_t *hw; 291 fpc->inst_offset = fpc->fp->insn_len; 292 grow_insns(fpc, 4); 293 hw = &fpc->fp->insn[fpc->inst_offset]; 294 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 295 hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT); 296 /* Use .xxxx swizzle so that we check only src[0].x*/ 297 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 298 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 299 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ 300 hw[3] = 0; 301 } 302 303 static void 304 nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target) 305 { 306 struct nvfx_relocation reloc; 307 uint32_t *hw; 308 fpc->inst_offset = fpc->fp->insn_len; 309 grow_insns(fpc, 4); 310 hw = &fpc->fp->insn[fpc->inst_offset]; 311 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 312 hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) | 313 NV40_FP_OP_OUT_NONE | 314 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 315 /* Use .xxxx swizzle so that we check only src[0].x*/ 316 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 317 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 318 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | 319 (count << NV40_FP_OP_REP_COUNT1_SHIFT) | 320 (count << NV40_FP_OP_REP_COUNT2_SHIFT) | 321 (count << NV40_FP_OP_REP_COUNT3_SHIFT); 322 hw[3] = 0; /* | end_offset */ 323 reloc.target = target; 324 reloc.location = fpc->inst_offset + 3; 325 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 326 //util_dynarray_append(&fpc->loop_stack, unsigned, target); 327 } 328 329 #if 0 330 /* documentation only */ 331 /* warning: this only works forward, and probably only if not inside any IF */ 332 static void 333 nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target) 334 { 335 struct nvfx_relocation reloc; 336 uint32_t *hw; 337 fpc->inst_offset = fpc->fp->insn_len; 338 grow_insns(fpc, 4); 339 hw = &fpc->fp->insn[fpc->inst_offset]; 340 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 341 hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | 342 NV40_FP_OP_OUT_NONE | 343 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 344 /* Use .xxxx swizzle so that we check only src[0].x*/ 345 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 346 (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT); 347 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */ 348 hw[3] = 0; /* | endif_offset */ 349 reloc.target = target; 350 reloc.location = fpc->inst_offset + 2; 351 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 352 reloc.target = target; 353 reloc.location = fpc->inst_offset + 3; 354 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 355 } 356 #endif 357 358 static void 359 nv40_fp_brk(struct nvfx_fpc *fpc) 360 { 361 uint32_t *hw; 362 fpc->inst_offset = fpc->fp->insn_len; 363 grow_insns(fpc, 4); 364 hw = &fpc->fp->insn[fpc->inst_offset]; 365 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 366 hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) | 367 NV40_FP_OP_OUT_NONE; 368 /* Use .xxxx swizzle so that we check only src[0].x*/ 369 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 370 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 371 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; 372 hw[3] = 0; 373 } 374 375 static inline struct nvfx_src 376 tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) 377 { 378 struct nvfx_src src; 379 380 switch (fsrc->Register.File) { 381 case TGSI_FILE_INPUT: 382 src.reg = fpc->r_input[fsrc->Register.Index]; 383 break; 384 case TGSI_FILE_CONSTANT: 385 src.reg = nvfx_reg(NVFXSR_CONST, fsrc->Register.Index); 386 break; 387 case TGSI_FILE_IMMEDIATE: 388 assert(fsrc->Register.Index < fpc->nr_imm); 389 src.reg = fpc->r_imm[fsrc->Register.Index]; 390 break; 391 case TGSI_FILE_TEMPORARY: 392 src.reg = fpc->r_temp[fsrc->Register.Index]; 393 break; 394 /* NV40 fragprog result regs are just temps, so this is simple */ 395 case TGSI_FILE_OUTPUT: 396 src.reg = fpc->r_result[fsrc->Register.Index]; 397 break; 398 default: 399 NOUVEAU_ERR("bad src file\n"); 400 src.reg.index = 0; 401 src.reg.type = 0; 402 break; 403 } 404 405 src.abs = fsrc->Register.Absolute; 406 src.negate = fsrc->Register.Negate; 407 src.swz[0] = fsrc->Register.SwizzleX; 408 src.swz[1] = fsrc->Register.SwizzleY; 409 src.swz[2] = fsrc->Register.SwizzleZ; 410 src.swz[3] = fsrc->Register.SwizzleW; 411 src.indirect = 0; 412 src.indirect_reg = 0; 413 src.indirect_swz = 0; 414 return src; 415 } 416 417 static inline struct nvfx_reg 418 tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) { 419 switch (fdst->Register.File) { 420 case TGSI_FILE_OUTPUT: 421 return fpc->r_result[fdst->Register.Index]; 422 case TGSI_FILE_TEMPORARY: 423 return fpc->r_temp[fdst->Register.Index]; 424 case TGSI_FILE_NULL: 425 return nvfx_reg(NVFXSR_NONE, 0); 426 default: 427 NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); 428 return nvfx_reg(NVFXSR_NONE, 0); 429 } 430 } 431 432 static inline int 433 tgsi_mask(uint tgsi) 434 { 435 int mask = 0; 436 437 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X; 438 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y; 439 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z; 440 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W; 441 return mask; 442 } 443 444 static bool 445 nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc, 446 const struct tgsi_full_instruction *finst) 447 { 448 const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 449 struct nvfx_insn insn; 450 struct nvfx_src src[3], tmp; 451 struct nvfx_reg dst; 452 int mask, sat, unit = 0; 453 int ai = -1, ci = -1, ii = -1; 454 int i; 455 456 if (finst->Instruction.Opcode == TGSI_OPCODE_END) 457 return true; 458 459 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 460 const struct tgsi_full_src_register *fsrc; 461 462 fsrc = &finst->Src[i]; 463 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { 464 src[i] = tgsi_src(fpc, fsrc); 465 } 466 } 467 468 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 469 const struct tgsi_full_src_register *fsrc; 470 471 fsrc = &finst->Src[i]; 472 473 switch (fsrc->Register.File) { 474 case TGSI_FILE_INPUT: 475 if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG && (0 476 || fsrc->Register.SwizzleX == PIPE_SWIZZLE_W 477 || fsrc->Register.SwizzleY == PIPE_SWIZZLE_W 478 || fsrc->Register.SwizzleZ == PIPE_SWIZZLE_W 479 || fsrc->Register.SwizzleW == PIPE_SWIZZLE_W 480 )) { 481 /* hardware puts 0 in fogcoord.w, but GL/Gallium want 1 there */ 482 struct nvfx_src addend = nvfx_src(nvfx_fp_imm(fpc, 0, 0, 0, 1)); 483 addend.swz[0] = fsrc->Register.SwizzleX; 484 addend.swz[1] = fsrc->Register.SwizzleY; 485 addend.swz[2] = fsrc->Register.SwizzleZ; 486 addend.swz[3] = fsrc->Register.SwizzleW; 487 src[i] = nvfx_src(temp(fpc)); 488 nvfx_fp_emit(fpc, arith(0, ADD, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), addend, none)); 489 } else if (ai == -1 || ai == fsrc->Register.Index) { 490 ai = fsrc->Register.Index; 491 src[i] = tgsi_src(fpc, fsrc); 492 } else { 493 src[i] = nvfx_src(temp(fpc)); 494 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 495 } 496 break; 497 case TGSI_FILE_CONSTANT: 498 if ((ci == -1 && ii == -1) || 499 ci == fsrc->Register.Index) { 500 ci = fsrc->Register.Index; 501 src[i] = tgsi_src(fpc, fsrc); 502 } else { 503 src[i] = nvfx_src(temp(fpc)); 504 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 505 } 506 break; 507 case TGSI_FILE_IMMEDIATE: 508 if ((ci == -1 && ii == -1) || 509 ii == fsrc->Register.Index) { 510 ii = fsrc->Register.Index; 511 src[i] = tgsi_src(fpc, fsrc); 512 } else { 513 src[i] = nvfx_src(temp(fpc)); 514 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 515 } 516 break; 517 case TGSI_FILE_TEMPORARY: 518 /* handled above */ 519 break; 520 case TGSI_FILE_SAMPLER: 521 unit = fsrc->Register.Index; 522 break; 523 case TGSI_FILE_OUTPUT: 524 break; 525 default: 526 NOUVEAU_ERR("bad src file\n"); 527 return false; 528 } 529 } 530 531 dst = tgsi_dst(fpc, &finst->Dst[0]); 532 mask = tgsi_mask(finst->Dst[0].Register.WriteMask); 533 sat = finst->Instruction.Saturate; 534 535 switch (finst->Instruction.Opcode) { 536 case TGSI_OPCODE_ADD: 537 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none)); 538 break; 539 case TGSI_OPCODE_CEIL: 540 tmp = nvfx_src(temp(fpc)); 541 nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none)); 542 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none)); 543 break; 544 case TGSI_OPCODE_CMP: 545 insn = arith(0, MOV, none.reg, mask, src[0], none, none); 546 insn.cc_update = 1; 547 nvfx_fp_emit(fpc, insn); 548 549 insn = arith(sat, MOV, dst, mask, src[2], none, none); 550 insn.cc_test = NVFX_COND_GE; 551 nvfx_fp_emit(fpc, insn); 552 553 insn = arith(sat, MOV, dst, mask, src[1], none, none); 554 insn.cc_test = NVFX_COND_LT; 555 nvfx_fp_emit(fpc, insn); 556 break; 557 case TGSI_OPCODE_COS: 558 nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none)); 559 break; 560 case TGSI_OPCODE_DDX: 561 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { 562 tmp = nvfx_src(temp(fpc)); 563 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); 564 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); 565 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); 566 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); 567 } else { 568 nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none)); 569 } 570 break; 571 case TGSI_OPCODE_DDY: 572 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { 573 tmp = nvfx_src(temp(fpc)); 574 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); 575 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); 576 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); 577 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); 578 } else { 579 nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none)); 580 } 581 break; 582 case TGSI_OPCODE_DP2: 583 tmp = nvfx_src(temp(fpc)); 584 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none)); 585 nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none)); 586 break; 587 case TGSI_OPCODE_DP3: 588 nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none)); 589 break; 590 case TGSI_OPCODE_DP4: 591 nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none)); 592 break; 593 case TGSI_OPCODE_DPH: 594 tmp = nvfx_src(temp(fpc)); 595 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none)); 596 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none)); 597 break; 598 case TGSI_OPCODE_DST: 599 nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none)); 600 break; 601 case TGSI_OPCODE_EX2: 602 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none)); 603 break; 604 case TGSI_OPCODE_FLR: 605 nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none)); 606 break; 607 case TGSI_OPCODE_FRC: 608 nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none)); 609 break; 610 case TGSI_OPCODE_KILL: 611 nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none)); 612 break; 613 case TGSI_OPCODE_KILL_IF: 614 insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none); 615 insn.cc_update = 1; 616 nvfx_fp_emit(fpc, insn); 617 618 insn = arith(0, KIL, none.reg, 0, none, none, none); 619 insn.cc_test = NVFX_COND_LT; 620 nvfx_fp_emit(fpc, insn); 621 break; 622 case TGSI_OPCODE_LG2: 623 nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none)); 624 break; 625 case TGSI_OPCODE_LIT: 626 if(!fpc->is_nv4x) 627 nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], none, none)); 628 else { 629 /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by 630 * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement 631 * 632 * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead 633 */ 634 struct nvfx_src maxs = nvfx_src(nvfx_fp_imm(fpc, 0, FLT_MIN, 0, 0)); 635 tmp = nvfx_src(temp(fpc)); 636 if (ci>= 0 || ii >= 0) { 637 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none)); 638 maxs = tmp; 639 } 640 nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none)); 641 nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none)); 642 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none)); 643 nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none)); 644 } 645 break; 646 case TGSI_OPCODE_LRP: 647 if(!fpc->is_nv4x) 648 nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2])); 649 else { 650 tmp = nvfx_src(temp(fpc)); 651 nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); 652 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp)); 653 } 654 break; 655 case TGSI_OPCODE_MAD: 656 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2])); 657 break; 658 case TGSI_OPCODE_MAX: 659 nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none)); 660 break; 661 case TGSI_OPCODE_MIN: 662 nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none)); 663 break; 664 case TGSI_OPCODE_MOV: 665 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none)); 666 break; 667 case TGSI_OPCODE_MUL: 668 nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none)); 669 break; 670 case TGSI_OPCODE_NOP: 671 break; 672 case TGSI_OPCODE_POW: 673 if(!fpc->is_nv4x) 674 nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none)); 675 else { 676 tmp = nvfx_src(temp(fpc)); 677 nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); 678 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); 679 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none)); 680 } 681 break; 682 case TGSI_OPCODE_RCP: 683 nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none)); 684 break; 685 case TGSI_OPCODE_RSQ: 686 if(!fpc->is_nv4x) 687 nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none)); 688 else { 689 tmp = nvfx_src(temp(fpc)); 690 insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none); 691 insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X; 692 nvfx_fp_emit(fpc, insn); 693 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none)); 694 } 695 break; 696 case TGSI_OPCODE_SCS: 697 /* avoid overwriting the source */ 698 if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X) 699 { 700 if (mask & NVFX_FP_MASK_X) 701 nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); 702 if (mask & NVFX_FP_MASK_Y) 703 nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none)); 704 } 705 else 706 { 707 if (mask & NVFX_FP_MASK_Y) 708 nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none)); 709 if (mask & NVFX_FP_MASK_X) 710 nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); 711 } 712 break; 713 case TGSI_OPCODE_SEQ: 714 nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none)); 715 break; 716 case TGSI_OPCODE_SGE: 717 nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none)); 718 break; 719 case TGSI_OPCODE_SGT: 720 nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none)); 721 break; 722 case TGSI_OPCODE_SIN: 723 nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none)); 724 break; 725 case TGSI_OPCODE_SLE: 726 nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none)); 727 break; 728 case TGSI_OPCODE_SLT: 729 nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none)); 730 break; 731 case TGSI_OPCODE_SNE: 732 nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none)); 733 break; 734 case TGSI_OPCODE_SSG: 735 { 736 struct nvfx_src minones = swz(nvfx_src(nvfx_fp_imm(fpc, -1, -1, -1, -1)), X, X, X, X); 737 738 insn = arith(sat, MOV, dst, mask, src[0], none, none); 739 insn.cc_update = 1; 740 nvfx_fp_emit(fpc, insn); 741 742 insn = arith(0, STR, dst, mask, none, none, none); 743 insn.cc_test = NVFX_COND_GT; 744 nvfx_fp_emit(fpc, insn); 745 746 if(!sat) { 747 insn = arith(0, MOV, dst, mask, minones, none, none); 748 insn.cc_test = NVFX_COND_LT; 749 nvfx_fp_emit(fpc, insn); 750 } 751 break; 752 } 753 case TGSI_OPCODE_TEX: 754 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); 755 break; 756 case TGSI_OPCODE_TRUNC: 757 tmp = nvfx_src(temp(fpc)); 758 insn = arith(0, MOV, none.reg, mask, src[0], none, none); 759 insn.cc_update = 1; 760 nvfx_fp_emit(fpc, insn); 761 762 nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none)); 763 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none)); 764 765 insn = arith(sat, MOV, dst, mask, neg(tmp), none, none); 766 insn.cc_test = NVFX_COND_LT; 767 nvfx_fp_emit(fpc, insn); 768 break; 769 case TGSI_OPCODE_TXB: 770 nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none)); 771 break; 772 case TGSI_OPCODE_TXL: 773 if(fpc->is_nv4x) 774 nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none)); 775 else /* unsupported on nv30, use TEX and hope they like it */ 776 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); 777 break; 778 case TGSI_OPCODE_TXP: 779 nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none)); 780 break; 781 case TGSI_OPCODE_XPD: 782 tmp = nvfx_src(temp(fpc)); 783 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none)); 784 nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp))); 785 break; 786 787 case TGSI_OPCODE_IF: 788 // MOVRC0 R31 (TR0.xyzw), R<src>: 789 // IF (NE.xxxx) ELSE <else> END <end> 790 if(!fpc->is_nv4x) 791 goto nv3x_cflow; 792 nv40_fp_if(fpc, src[0]); 793 break; 794 795 case TGSI_OPCODE_ELSE: 796 { 797 uint32_t *hw; 798 if(!fpc->is_nv4x) 799 goto nv3x_cflow; 800 assert(util_dynarray_contains(&fpc->if_stack, unsigned)); 801 hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)]; 802 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; 803 break; 804 } 805 806 case TGSI_OPCODE_ENDIF: 807 { 808 uint32_t *hw; 809 if(!fpc->is_nv4x) 810 goto nv3x_cflow; 811 assert(util_dynarray_contains(&fpc->if_stack, unsigned)); 812 hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)]; 813 if(!hw[2]) 814 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; 815 hw[3] = fpc->fp->insn_len; 816 break; 817 } 818 819 case TGSI_OPCODE_BGNSUB: 820 case TGSI_OPCODE_ENDSUB: 821 /* nothing to do here */ 822 break; 823 824 case TGSI_OPCODE_CAL: 825 if(!fpc->is_nv4x) 826 goto nv3x_cflow; 827 nv40_fp_cal(fpc, finst->Label.Label); 828 break; 829 830 case TGSI_OPCODE_RET: 831 if(!fpc->is_nv4x) 832 goto nv3x_cflow; 833 nv40_fp_ret(fpc); 834 break; 835 836 case TGSI_OPCODE_BGNLOOP: 837 if(!fpc->is_nv4x) 838 goto nv3x_cflow; 839 /* TODO: we should support using two nested REPs to allow a > 255 iteration count */ 840 nv40_fp_rep(fpc, 255, finst->Label.Label); 841 break; 842 843 case TGSI_OPCODE_ENDLOOP: 844 break; 845 846 case TGSI_OPCODE_BRK: 847 if(!fpc->is_nv4x) 848 goto nv3x_cflow; 849 nv40_fp_brk(fpc); 850 break; 851 852 case TGSI_OPCODE_CONT: 853 { 854 static int warned = 0; 855 if(!warned) { 856 NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n"); 857 warned = 1; 858 } 859 break; 860 } 861 862 default: 863 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); 864 return false; 865 } 866 867 out: 868 release_temps(fpc); 869 return true; 870 nv3x_cflow: 871 { 872 static int warned = 0; 873 if(!warned) { 874 NOUVEAU_ERR( 875 "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n" 876 "If rendering is incorrect, try to disable GLSL support in the application.\n"); 877 warned = 1; 878 } 879 } 880 goto out; 881 } 882 883 static bool 884 nvfx_fragprog_parse_decl_input(struct nvfx_fpc *fpc, 885 const struct tgsi_full_declaration *fdec) 886 { 887 unsigned idx = fdec->Range.First; 888 unsigned hw; 889 890 switch (fdec->Semantic.Name) { 891 case TGSI_SEMANTIC_POSITION: 892 hw = NVFX_FP_OP_INPUT_SRC_POSITION; 893 break; 894 case TGSI_SEMANTIC_COLOR: 895 hw = NVFX_FP_OP_INPUT_SRC_COL0 + fdec->Semantic.Index; 896 break; 897 case TGSI_SEMANTIC_FOG: 898 hw = NVFX_FP_OP_INPUT_SRC_FOGC; 899 break; 900 case TGSI_SEMANTIC_FACE: 901 hw = NV40_FP_OP_INPUT_SRC_FACING; 902 break; 903 case TGSI_SEMANTIC_TEXCOORD: 904 assert(fdec->Semantic.Index < 8); 905 fpc->fp->texcoord[fdec->Semantic.Index] = fdec->Semantic.Index; 906 fpc->fp->texcoords |= (1 << fdec->Semantic.Index); 907 fpc->fp->vp_or |= (0x00004000 << fdec->Semantic.Index); 908 hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.Index); 909 break; 910 case TGSI_SEMANTIC_GENERIC: 911 case TGSI_SEMANTIC_PCOORD: 912 /* will be assigned to remaining TC slots later */ 913 return true; 914 default: 915 assert(0); 916 return false; 917 } 918 919 fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw); 920 return true; 921 } 922 923 static bool 924 nvfx_fragprog_assign_generic(struct nvfx_fpc *fpc, 925 const struct tgsi_full_declaration *fdec) 926 { 927 unsigned num_texcoords = fpc->is_nv4x ? 10 : 8; 928 unsigned idx = fdec->Range.First; 929 unsigned hw; 930 931 switch (fdec->Semantic.Name) { 932 case TGSI_SEMANTIC_GENERIC: 933 case TGSI_SEMANTIC_PCOORD: 934 for (hw = 0; hw < num_texcoords; hw++) { 935 if (fpc->fp->texcoord[hw] == 0xffff) { 936 if (hw <= 7) { 937 fpc->fp->texcoords |= (0x1 << hw); 938 fpc->fp->vp_or |= (0x00004000 << hw); 939 } else { 940 fpc->fp->vp_or |= (0x00001000 << (hw - 8)); 941 } 942 if (fdec->Semantic.Name == TGSI_SEMANTIC_PCOORD) { 943 fpc->fp->texcoord[hw] = 0xfffe; 944 fpc->fp->point_sprite_control |= (0x00000100 << hw); 945 } else { 946 fpc->fp->texcoord[hw] = fdec->Semantic.Index + 8; 947 } 948 hw = NVFX_FP_OP_INPUT_SRC_TC(hw); 949 fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw); 950 return true; 951 } 952 } 953 return false; 954 default: 955 return true; 956 } 957 } 958 959 static bool 960 nvfx_fragprog_parse_decl_output(struct nvfx_fpc *fpc, 961 const struct tgsi_full_declaration *fdec) 962 { 963 unsigned idx = fdec->Range.First; 964 unsigned hw; 965 966 switch (fdec->Semantic.Name) { 967 case TGSI_SEMANTIC_POSITION: 968 hw = 1; 969 break; 970 case TGSI_SEMANTIC_COLOR: 971 hw = ~0; 972 switch (fdec->Semantic.Index) { 973 case 0: hw = 0; break; 974 case 1: hw = 2; break; 975 case 2: hw = 3; break; 976 case 3: hw = 4; break; 977 } 978 if(hw > ((fpc->is_nv4x) ? 4 : 2)) { 979 NOUVEAU_ERR("bad rcol index\n"); 980 return false; 981 } 982 break; 983 default: 984 NOUVEAU_ERR("bad output semantic\n"); 985 return false; 986 } 987 988 fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); 989 fpc->r_temps |= (1ULL << hw); 990 return true; 991 } 992 993 static bool 994 nvfx_fragprog_prepare(struct nvfx_fpc *fpc) 995 { 996 struct tgsi_parse_context p; 997 int high_temp = -1, i; 998 999 fpc->r_imm = CALLOC(fpc->fp->info.immediate_count, sizeof(struct nvfx_reg)); 1000 1001 tgsi_parse_init(&p, fpc->fp->pipe.tokens); 1002 while (!tgsi_parse_end_of_tokens(&p)) { 1003 const union tgsi_full_token *tok = &p.FullToken; 1004 1005 tgsi_parse_token(&p); 1006 switch(tok->Token.Type) { 1007 case TGSI_TOKEN_TYPE_DECLARATION: 1008 { 1009 const struct tgsi_full_declaration *fdec; 1010 fdec = &p.FullToken.FullDeclaration; 1011 switch (fdec->Declaration.File) { 1012 case TGSI_FILE_INPUT: 1013 if (!nvfx_fragprog_parse_decl_input(fpc, fdec)) 1014 goto out_err; 1015 break; 1016 case TGSI_FILE_OUTPUT: 1017 if (!nvfx_fragprog_parse_decl_output(fpc, fdec)) 1018 goto out_err; 1019 break; 1020 case TGSI_FILE_TEMPORARY: 1021 if (fdec->Range.Last > high_temp) { 1022 high_temp = 1023 fdec->Range.Last; 1024 } 1025 break; 1026 default: 1027 break; 1028 } 1029 } 1030 break; 1031 case TGSI_TOKEN_TYPE_IMMEDIATE: 1032 { 1033 struct tgsi_full_immediate *imm; 1034 1035 imm = &p.FullToken.FullImmediate; 1036 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); 1037 assert(fpc->nr_imm < fpc->fp->info.immediate_count); 1038 1039 fpc->r_imm[fpc->nr_imm++] = nvfx_fp_imm(fpc, imm->u[0].Float, imm->u[1].Float, imm->u[2].Float, imm->u[3].Float); 1040 break; 1041 } 1042 default: 1043 break; 1044 } 1045 } 1046 tgsi_parse_free(&p); 1047 1048 tgsi_parse_init(&p, fpc->fp->pipe.tokens); 1049 while (!tgsi_parse_end_of_tokens(&p)) { 1050 const struct tgsi_full_declaration *fdec; 1051 tgsi_parse_token(&p); 1052 switch(p.FullToken.Token.Type) { 1053 case TGSI_TOKEN_TYPE_DECLARATION: 1054 fdec = &p.FullToken.FullDeclaration; 1055 switch (fdec->Declaration.File) { 1056 case TGSI_FILE_INPUT: 1057 if (!nvfx_fragprog_assign_generic(fpc, fdec)) 1058 goto out_err; 1059 break; 1060 default: 1061 break; 1062 } 1063 break; 1064 default: 1065 break; 1066 } 1067 } 1068 tgsi_parse_free(&p); 1069 1070 if (++high_temp) { 1071 fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); 1072 for (i = 0; i < high_temp; i++) 1073 fpc->r_temp[i] = temp(fpc); 1074 fpc->r_temps_discard = 0ULL; 1075 } 1076 1077 return true; 1078 1079 out_err: 1080 FREE(fpc->r_temp); 1081 fpc->r_temp = NULL; 1082 1083 tgsi_parse_free(&p); 1084 return false; 1085 } 1086 1087 DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", false) 1088 1089 void 1090 _nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp) 1091 { 1092 struct tgsi_parse_context parse; 1093 struct nvfx_fpc *fpc = NULL; 1094 struct util_dynarray insns; 1095 1096 fp->translated = false; 1097 fp->point_sprite_control = 0; 1098 fp->vp_or = 0; 1099 1100 fpc = CALLOC_STRUCT(nvfx_fpc); 1101 if (!fpc) 1102 goto out_err; 1103 1104 fpc->is_nv4x = (oclass >= NV40_3D_CLASS) ? ~0 : 0; 1105 fpc->max_temps = fpc->is_nv4x ? 48 : 32; 1106 fpc->fp = fp; 1107 fpc->num_regs = 2; 1108 memset(fp->texcoord, 0xff, sizeof(fp->texcoord)); 1109 1110 if (fp->info.properties[TGSI_PROPERTY_FS_COORD_ORIGIN]) 1111 fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED; 1112 if (fp->info.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER]) 1113 fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER; 1114 if (fp->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) 1115 fp->rt_enable |= NV30_3D_RT_ENABLE_MRT; 1116 1117 if (!nvfx_fragprog_prepare(fpc)) 1118 goto out_err; 1119 1120 tgsi_parse_init(&parse, fp->pipe.tokens); 1121 util_dynarray_init(&insns); 1122 1123 while (!tgsi_parse_end_of_tokens(&parse)) { 1124 tgsi_parse_token(&parse); 1125 1126 switch (parse.FullToken.Token.Type) { 1127 case TGSI_TOKEN_TYPE_INSTRUCTION: 1128 { 1129 const struct tgsi_full_instruction *finst; 1130 1131 util_dynarray_append(&insns, unsigned, fp->insn_len); 1132 finst = &parse.FullToken.FullInstruction; 1133 if (!nvfx_fragprog_parse_instruction(fpc, finst)) 1134 goto out_err; 1135 } 1136 break; 1137 default: 1138 break; 1139 } 1140 } 1141 util_dynarray_append(&insns, unsigned, fp->insn_len); 1142 1143 for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) 1144 { 1145 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i); 1146 fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target]; 1147 } 1148 util_dynarray_fini(&insns); 1149 1150 if(!fpc->is_nv4x) 1151 fp->fp_control |= (fpc->num_regs-1)/2; 1152 else 1153 fp->fp_control |= fpc->num_regs << NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT; 1154 1155 /* Terminate final instruction */ 1156 if(fp->insn) 1157 fp->insn[fpc->inst_offset] |= 0x00000001; 1158 1159 /* Append NOP + END instruction for branches to the end of the program */ 1160 fpc->inst_offset = fp->insn_len; 1161 grow_insns(fpc, 4); 1162 fp->insn[fpc->inst_offset + 0] = 0x00000001; 1163 fp->insn[fpc->inst_offset + 1] = 0x00000000; 1164 fp->insn[fpc->inst_offset + 2] = 0x00000000; 1165 fp->insn[fpc->inst_offset + 3] = 0x00000000; 1166 1167 if(debug_get_option_nvfx_dump_fp()) 1168 { 1169 debug_printf("\n"); 1170 tgsi_dump(fp->pipe.tokens, 0); 1171 1172 debug_printf("\n%s fragment program:\n", fpc->is_nv4x ? "nv4x" : "nv3x"); 1173 for (unsigned i = 0; i < fp->insn_len; i += 4) 1174 debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]); 1175 debug_printf("\n"); 1176 } 1177 1178 fp->translated = true; 1179 1180 out: 1181 tgsi_parse_free(&parse); 1182 if (fpc) 1183 { 1184 FREE(fpc->r_temp); 1185 FREE(fpc->r_imm); 1186 util_dynarray_fini(&fpc->if_stack); 1187 util_dynarray_fini(&fpc->label_relocs); 1188 util_dynarray_fini(&fpc->imm_data); 1189 //util_dynarray_fini(&fpc->loop_stack); 1190 FREE(fpc); 1191 } 1192 1193 return; 1194 1195 out_err: 1196 _debug_printf("Error: failed to compile this fragment program:\n"); 1197 tgsi_dump(fp->pipe.tokens, 0); 1198 goto out; 1199 } 1200