1 #include <float.h> 2 #include "pipe/p_context.h" 3 #include "pipe/p_defines.h" 4 #include "pipe/p_state.h" 5 #include "util/u_linkage.h" 6 #include "util/u_inlines.h" 7 #include "util/u_debug.h" 8 9 #include "pipe/p_shader_tokens.h" 10 #include "tgsi/tgsi_parse.h" 11 #include "tgsi/tgsi_util.h" 12 #include "tgsi/tgsi_dump.h" 13 #include "tgsi/tgsi_ureg.h" 14 15 #include "nv30-40_3d.xml.h" 16 #include "nv30_context.h" 17 #include "nvfx_shader.h" 18 19 struct nvfx_fpc { 20 struct nv30_fragprog *fp; 21 22 unsigned max_temps; 23 unsigned long long r_temps; 24 unsigned long long r_temps_discard; 25 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; 26 struct nvfx_reg r_input[PIPE_MAX_SHADER_INPUTS]; 27 struct nvfx_reg *r_temp; 28 29 int num_regs; 30 31 unsigned inst_offset; 32 unsigned have_const; 33 34 struct util_dynarray imm_data; 35 36 struct nvfx_reg* r_imm; 37 unsigned nr_imm; 38 39 struct util_dynarray if_stack; 40 //struct util_dynarray loop_stack; 41 struct util_dynarray label_relocs; 42 }; 43 44 static INLINE struct nvfx_reg 45 temp(struct nvfx_fpc *fpc) 46 { 47 int idx = __builtin_ctzll(~fpc->r_temps); 48 49 if (idx >= fpc->max_temps) { 50 NOUVEAU_ERR("out of temps!!\n"); 51 assert(0); 52 return nvfx_reg(NVFXSR_TEMP, 0); 53 } 54 55 fpc->r_temps |= (1ULL << idx); 56 fpc->r_temps_discard |= (1ULL << idx); 57 return nvfx_reg(NVFXSR_TEMP, idx); 58 } 59 60 static INLINE void 61 release_temps(struct nvfx_fpc *fpc) 62 { 63 fpc->r_temps &= ~fpc->r_temps_discard; 64 fpc->r_temps_discard = 0ULL; 65 } 66 67 static inline struct nvfx_reg 68 nvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d) 69 { 70 float v[4] = {a, b, c, d}; 71 int idx = fpc->imm_data.size >> 4; 72 73 memcpy(util_dynarray_grow(&fpc->imm_data, sizeof(float) * 4), v, 4 * sizeof(float)); 74 return nvfx_reg(NVFXSR_IMM, idx); 75 } 76 77 static void 78 grow_insns(struct nvfx_fpc *fpc, int size) 79 { 80 struct nv30_fragprog *fp = fpc->fp; 81 82 fp->insn_len += size; 83 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); 84 } 85 86 static void 87 emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src) 88 { 89 struct nv30_fragprog *fp = fpc->fp; 90 uint32_t *hw = &fp->insn[fpc->inst_offset]; 91 uint32_t sr = 0; 92 93 switch (src.reg.type) { 94 case NVFXSR_INPUT: 95 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); 96 hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT); 97 break; 98 case NVFXSR_OUTPUT: 99 sr |= NVFX_FP_REG_SRC_HALF; 100 /* fall-through */ 101 case NVFXSR_TEMP: 102 sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); 103 sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT); 104 break; 105 case NVFXSR_IMM: 106 if (!fpc->have_const) { 107 grow_insns(fpc, 4); 108 hw = &fp->insn[fpc->inst_offset]; 109 fpc->have_const = 1; 110 } 111 112 memcpy(&fp->insn[fpc->inst_offset + 4], 113 (float*)fpc->imm_data.data + src.reg.index * 4, 114 sizeof(uint32_t) * 4); 115 116 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); 117 break; 118 case NVFXSR_CONST: 119 if (!fpc->have_const) { 120 grow_insns(fpc, 4); 121 hw = &fp->insn[fpc->inst_offset]; 122 fpc->have_const = 1; 123 } 124 125 { 126 struct nv30_fragprog_data *fpd; 127 128 fp->consts = realloc(fp->consts, ++fp->nr_consts * 129 sizeof(*fpd)); 130 fpd = &fp->consts[fp->nr_consts - 1]; 131 fpd->offset = fpc->inst_offset + 4; 132 fpd->index = src.reg.index; 133 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); 134 } 135 136 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); 137 break; 138 case NVFXSR_NONE: 139 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); 140 break; 141 default: 142 assert(0); 143 } 144 145 if (src.negate) 146 sr |= NVFX_FP_REG_NEGATE; 147 148 if (src.abs) 149 hw[1] |= (1 << (29 + pos)); 150 151 sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | 152 (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | 153 (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | 154 (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); 155 156 hw[pos + 1] |= sr; 157 } 158 159 static void 160 emit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst) 161 { 162 struct nv30_fragprog *fp = fpc->fp; 163 uint32_t *hw = &fp->insn[fpc->inst_offset]; 164 165 switch (dst.type) { 166 case NVFXSR_OUTPUT: 167 if (dst.index == 1) 168 fp->fp_control |= 0x0000000e; 169 else { 170 hw[0] |= NVFX_FP_OP_OUT_REG_HALF; 171 dst.index <<= 1; 172 } 173 /* fall-through */ 174 case NVFXSR_TEMP: 175 if (fpc->num_regs < (dst.index + 1)) 176 fpc->num_regs = dst.index + 1; 177 break; 178 case NVFXSR_NONE: 179 hw[0] |= (1 << 30); 180 break; 181 default: 182 assert(0); 183 } 184 185 hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); 186 } 187 188 static void 189 nvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn) 190 { 191 struct nv30_fragprog *fp = fpc->fp; 192 uint32_t *hw; 193 194 fpc->inst_offset = fp->insn_len; 195 fpc->have_const = 0; 196 grow_insns(fpc, 4); 197 hw = &fp->insn[fpc->inst_offset]; 198 memset(hw, 0, sizeof(uint32_t) * 4); 199 200 if (insn.op == NVFX_FP_OP_OPCODE_KIL) 201 fp->fp_control |= NV30_3D_FP_CONTROL_USES_KIL; 202 hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT); 203 hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT); 204 hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT); 205 206 if (insn.sat) 207 hw[0] |= NVFX_FP_OP_OUT_SAT; 208 209 if (insn.cc_update) 210 hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; 211 hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT); 212 hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 213 (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | 214 (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | 215 (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); 216 217 if(insn.unit >= 0) 218 { 219 hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT); 220 } 221 222 emit_dst(fpc, insn.dst); 223 emit_src(fpc, 0, insn.src[0]); 224 emit_src(fpc, 1, insn.src[1]); 225 emit_src(fpc, 2, insn.src[2]); 226 } 227 228 #define arith(s,o,d,m,s0,s1,s2) \ 229 nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \ 230 (d), (m), (s0), (s1), (s2)) 231 232 #define tex(s,o,u,d,m,s0,s1,s2) \ 233 nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \ 234 (d), (m), (s0), none, none) 235 236 /* IF src.x != 0, as TGSI specifies */ 237 static void 238 nv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src) 239 { 240 const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 241 struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none); 242 uint32_t *hw; 243 insn.cc_update = 1; 244 nvfx_fp_emit(fpc, insn); 245 246 fpc->inst_offset = fpc->fp->insn_len; 247 grow_insns(fpc, 4); 248 hw = &fpc->fp->insn[fpc->inst_offset]; 249 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 250 hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | 251 NV40_FP_OP_OUT_NONE | 252 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 253 /* Use .xxxx swizzle so that we check only src[0].x*/ 254 hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 255 (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | 256 (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | 257 (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) | 258 (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT); 259 hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */ 260 hw[3] = 0; /* | endif_offset */ 261 util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset); 262 } 263 264 /* IF src.x != 0, as TGSI specifies */ 265 static void 266 nv40_fp_cal(struct nvfx_fpc *fpc, unsigned target) 267 { 268 struct nvfx_relocation reloc; 269 uint32_t *hw; 270 fpc->inst_offset = fpc->fp->insn_len; 271 grow_insns(fpc, 4); 272 hw = &fpc->fp->insn[fpc->inst_offset]; 273 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 274 hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT); 275 /* Use .xxxx swizzle so that we check only src[0].x*/ 276 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 277 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 278 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ 279 hw[3] = 0; 280 reloc.target = target; 281 reloc.location = fpc->inst_offset + 2; 282 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 283 } 284 285 static void 286 nv40_fp_ret(struct nvfx_fpc *fpc) 287 { 288 uint32_t *hw; 289 fpc->inst_offset = fpc->fp->insn_len; 290 grow_insns(fpc, 4); 291 hw = &fpc->fp->insn[fpc->inst_offset]; 292 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 293 hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT); 294 /* Use .xxxx swizzle so that we check only src[0].x*/ 295 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 296 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 297 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ 298 hw[3] = 0; 299 } 300 301 static void 302 nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target) 303 { 304 struct nvfx_relocation reloc; 305 uint32_t *hw; 306 fpc->inst_offset = fpc->fp->insn_len; 307 grow_insns(fpc, 4); 308 hw = &fpc->fp->insn[fpc->inst_offset]; 309 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 310 hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) | 311 NV40_FP_OP_OUT_NONE | 312 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 313 /* Use .xxxx swizzle so that we check only src[0].x*/ 314 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | 315 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 316 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | 317 (count << NV40_FP_OP_REP_COUNT1_SHIFT) | 318 (count << NV40_FP_OP_REP_COUNT2_SHIFT) | 319 (count << NV40_FP_OP_REP_COUNT3_SHIFT); 320 hw[3] = 0; /* | end_offset */ 321 reloc.target = target; 322 reloc.location = fpc->inst_offset + 3; 323 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 324 //util_dynarray_append(&fpc->loop_stack, unsigned, target); 325 } 326 327 /* warning: this only works forward, and probably only if not inside any IF */ 328 static void 329 nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target) 330 { 331 struct nvfx_relocation reloc; 332 uint32_t *hw; 333 fpc->inst_offset = fpc->fp->insn_len; 334 grow_insns(fpc, 4); 335 hw = &fpc->fp->insn[fpc->inst_offset]; 336 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 337 hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | 338 NV40_FP_OP_OUT_NONE | 339 (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); 340 /* Use .xxxx swizzle so that we check only src[0].x*/ 341 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 342 (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT); 343 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */ 344 hw[3] = 0; /* | endif_offset */ 345 reloc.target = target; 346 reloc.location = fpc->inst_offset + 2; 347 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 348 reloc.target = target; 349 reloc.location = fpc->inst_offset + 3; 350 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); 351 } 352 353 static void 354 nv40_fp_brk(struct nvfx_fpc *fpc) 355 { 356 uint32_t *hw; 357 fpc->inst_offset = fpc->fp->insn_len; 358 grow_insns(fpc, 4); 359 hw = &fpc->fp->insn[fpc->inst_offset]; 360 /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ 361 hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) | 362 NV40_FP_OP_OUT_NONE; 363 /* Use .xxxx swizzle so that we check only src[0].x*/ 364 hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | 365 (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); 366 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; 367 hw[3] = 0; 368 } 369 370 static INLINE struct nvfx_src 371 tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) 372 { 373 struct nvfx_src src; 374 375 switch (fsrc->Register.File) { 376 case TGSI_FILE_INPUT: 377 src.reg = fpc->r_input[fsrc->Register.Index]; 378 break; 379 case TGSI_FILE_CONSTANT: 380 src.reg = nvfx_reg(NVFXSR_CONST, fsrc->Register.Index); 381 break; 382 case TGSI_FILE_IMMEDIATE: 383 assert(fsrc->Register.Index < fpc->nr_imm); 384 src.reg = fpc->r_imm[fsrc->Register.Index]; 385 break; 386 case TGSI_FILE_TEMPORARY: 387 src.reg = fpc->r_temp[fsrc->Register.Index]; 388 break; 389 /* NV40 fragprog result regs are just temps, so this is simple */ 390 case TGSI_FILE_OUTPUT: 391 src.reg = fpc->r_result[fsrc->Register.Index]; 392 break; 393 default: 394 NOUVEAU_ERR("bad src file\n"); 395 src.reg.index = 0; 396 src.reg.type = 0; 397 break; 398 } 399 400 src.abs = fsrc->Register.Absolute; 401 src.negate = fsrc->Register.Negate; 402 src.swz[0] = fsrc->Register.SwizzleX; 403 src.swz[1] = fsrc->Register.SwizzleY; 404 src.swz[2] = fsrc->Register.SwizzleZ; 405 src.swz[3] = fsrc->Register.SwizzleW; 406 src.indirect = 0; 407 src.indirect_reg = 0; 408 src.indirect_swz = 0; 409 return src; 410 } 411 412 static INLINE struct nvfx_reg 413 tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) { 414 switch (fdst->Register.File) { 415 case TGSI_FILE_OUTPUT: 416 return fpc->r_result[fdst->Register.Index]; 417 case TGSI_FILE_TEMPORARY: 418 return fpc->r_temp[fdst->Register.Index]; 419 case TGSI_FILE_NULL: 420 return nvfx_reg(NVFXSR_NONE, 0); 421 default: 422 NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); 423 return nvfx_reg(NVFXSR_NONE, 0); 424 } 425 } 426 427 static INLINE int 428 tgsi_mask(uint tgsi) 429 { 430 int mask = 0; 431 432 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X; 433 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y; 434 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z; 435 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W; 436 return mask; 437 } 438 439 static boolean 440 nvfx_fragprog_parse_instruction(struct nv30_context* nvfx, struct nvfx_fpc *fpc, 441 const struct tgsi_full_instruction *finst) 442 { 443 const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 444 struct nvfx_insn insn; 445 struct nvfx_src src[3], tmp; 446 struct nvfx_reg dst; 447 int mask, sat, unit = 0; 448 int ai = -1, ci = -1, ii = -1; 449 int i; 450 451 if (finst->Instruction.Opcode == TGSI_OPCODE_END) 452 return TRUE; 453 454 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 455 const struct tgsi_full_src_register *fsrc; 456 457 fsrc = &finst->Src[i]; 458 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { 459 src[i] = tgsi_src(fpc, fsrc); 460 } 461 } 462 463 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 464 const struct tgsi_full_src_register *fsrc; 465 466 fsrc = &finst->Src[i]; 467 468 switch (fsrc->Register.File) { 469 case TGSI_FILE_INPUT: 470 if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG && (0 471 || fsrc->Register.SwizzleX == PIPE_SWIZZLE_ALPHA 472 || fsrc->Register.SwizzleY == PIPE_SWIZZLE_ALPHA 473 || fsrc->Register.SwizzleZ == PIPE_SWIZZLE_ALPHA 474 || fsrc->Register.SwizzleW == PIPE_SWIZZLE_ALPHA 475 )) { 476 /* hardware puts 0 in fogcoord.w, but GL/Gallium want 1 there */ 477 struct nvfx_src addend = nvfx_src(nvfx_fp_imm(fpc, 0, 0, 0, 1)); 478 addend.swz[0] = fsrc->Register.SwizzleX; 479 addend.swz[1] = fsrc->Register.SwizzleY; 480 addend.swz[2] = fsrc->Register.SwizzleZ; 481 addend.swz[3] = fsrc->Register.SwizzleW; 482 src[i] = nvfx_src(temp(fpc)); 483 nvfx_fp_emit(fpc, arith(0, ADD, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), addend, none)); 484 } else if (ai == -1 || ai == fsrc->Register.Index) { 485 ai = fsrc->Register.Index; 486 src[i] = tgsi_src(fpc, fsrc); 487 } else { 488 src[i] = nvfx_src(temp(fpc)); 489 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 490 } 491 break; 492 case TGSI_FILE_CONSTANT: 493 if ((ci == -1 && ii == -1) || 494 ci == fsrc->Register.Index) { 495 ci = fsrc->Register.Index; 496 src[i] = tgsi_src(fpc, fsrc); 497 } else { 498 src[i] = nvfx_src(temp(fpc)); 499 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 500 } 501 break; 502 case TGSI_FILE_IMMEDIATE: 503 if ((ci == -1 && ii == -1) || 504 ii == fsrc->Register.Index) { 505 ii = fsrc->Register.Index; 506 src[i] = tgsi_src(fpc, fsrc); 507 } else { 508 src[i] = nvfx_src(temp(fpc)); 509 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); 510 } 511 break; 512 case TGSI_FILE_TEMPORARY: 513 /* handled above */ 514 break; 515 case TGSI_FILE_SAMPLER: 516 unit = fsrc->Register.Index; 517 break; 518 case TGSI_FILE_OUTPUT: 519 break; 520 default: 521 NOUVEAU_ERR("bad src file\n"); 522 return FALSE; 523 } 524 } 525 526 dst = tgsi_dst(fpc, &finst->Dst[0]); 527 mask = tgsi_mask(finst->Dst[0].Register.WriteMask); 528 sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); 529 530 switch (finst->Instruction.Opcode) { 531 case TGSI_OPCODE_ABS: 532 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, abs(src[0]), none, none)); 533 break; 534 case TGSI_OPCODE_ADD: 535 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none)); 536 break; 537 case TGSI_OPCODE_CEIL: 538 tmp = nvfx_src(temp(fpc)); 539 nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none)); 540 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none)); 541 break; 542 case TGSI_OPCODE_CMP: 543 insn = arith(0, MOV, none.reg, mask, src[0], none, none); 544 insn.cc_update = 1; 545 nvfx_fp_emit(fpc, insn); 546 547 insn = arith(sat, MOV, dst, mask, src[2], none, none); 548 insn.cc_test = NVFX_COND_GE; 549 nvfx_fp_emit(fpc, insn); 550 551 insn = arith(sat, MOV, dst, mask, src[1], none, none); 552 insn.cc_test = NVFX_COND_LT; 553 nvfx_fp_emit(fpc, insn); 554 break; 555 case TGSI_OPCODE_COS: 556 nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none)); 557 break; 558 case TGSI_OPCODE_DDX: 559 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { 560 tmp = nvfx_src(temp(fpc)); 561 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); 562 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); 563 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); 564 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); 565 } else { 566 nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none)); 567 } 568 break; 569 case TGSI_OPCODE_DDY: 570 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { 571 tmp = nvfx_src(temp(fpc)); 572 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); 573 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); 574 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); 575 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); 576 } else { 577 nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none)); 578 } 579 break; 580 case TGSI_OPCODE_DP2: 581 tmp = nvfx_src(temp(fpc)); 582 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none)); 583 nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none)); 584 break; 585 case TGSI_OPCODE_DP3: 586 nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none)); 587 break; 588 case TGSI_OPCODE_DP4: 589 nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none)); 590 break; 591 case TGSI_OPCODE_DPH: 592 tmp = nvfx_src(temp(fpc)); 593 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none)); 594 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none)); 595 break; 596 case TGSI_OPCODE_DST: 597 nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none)); 598 break; 599 case TGSI_OPCODE_EX2: 600 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none)); 601 break; 602 case TGSI_OPCODE_FLR: 603 nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none)); 604 break; 605 case TGSI_OPCODE_FRC: 606 nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none)); 607 break; 608 case TGSI_OPCODE_KILP: 609 nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none)); 610 break; 611 case TGSI_OPCODE_KIL: 612 insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none); 613 insn.cc_update = 1; 614 nvfx_fp_emit(fpc, insn); 615 616 insn = arith(0, KIL, none.reg, 0, none, none, none); 617 insn.cc_test = NVFX_COND_LT; 618 nvfx_fp_emit(fpc, insn); 619 break; 620 case TGSI_OPCODE_LG2: 621 nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none)); 622 break; 623 case TGSI_OPCODE_LIT: 624 if(!nvfx->is_nv4x) 625 nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], none, none)); 626 else { 627 /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by 628 * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement 629 * 630 * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead 631 */ 632 struct nvfx_src maxs = nvfx_src(nvfx_fp_imm(fpc, 0, FLT_MIN, 0, 0)); 633 tmp = nvfx_src(temp(fpc)); 634 if (ci>= 0 || ii >= 0) { 635 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none)); 636 maxs = tmp; 637 } 638 nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none)); 639 nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none)); 640 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none)); 641 nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none)); 642 } 643 break; 644 case TGSI_OPCODE_LRP: 645 if(!nvfx->is_nv4x) 646 nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2])); 647 else { 648 tmp = nvfx_src(temp(fpc)); 649 nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); 650 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp)); 651 } 652 break; 653 case TGSI_OPCODE_MAD: 654 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2])); 655 break; 656 case TGSI_OPCODE_MAX: 657 nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none)); 658 break; 659 case TGSI_OPCODE_MIN: 660 nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none)); 661 break; 662 case TGSI_OPCODE_MOV: 663 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none)); 664 break; 665 case TGSI_OPCODE_MUL: 666 nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none)); 667 break; 668 case TGSI_OPCODE_NOP: 669 break; 670 case TGSI_OPCODE_POW: 671 if(!nvfx->is_nv4x) 672 nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none)); 673 else { 674 tmp = nvfx_src(temp(fpc)); 675 nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); 676 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); 677 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none)); 678 } 679 break; 680 case TGSI_OPCODE_RCP: 681 nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none)); 682 break; 683 case TGSI_OPCODE_RFL: 684 if(!nvfx->is_nv4x) 685 nvfx_fp_emit(fpc, arith(0, RFL_NV30, dst, mask, src[0], src[1], none)); 686 else { 687 tmp = nvfx_src(temp(fpc)); 688 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[0], none)); 689 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_Y, src[0], src[1], none)); 690 insn = arith(0, DIV, tmp.reg, NVFX_FP_MASK_Z, swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); 691 insn.scale = NVFX_FP_OP_DST_SCALE_2X; 692 nvfx_fp_emit(fpc, insn); 693 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]))); 694 } 695 break; 696 case TGSI_OPCODE_RSQ: 697 if(!nvfx->is_nv4x) 698 nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none)); 699 else { 700 tmp = nvfx_src(temp(fpc)); 701 insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none); 702 insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X; 703 nvfx_fp_emit(fpc, insn); 704 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none)); 705 } 706 break; 707 case TGSI_OPCODE_SCS: 708 /* avoid overwriting the source */ 709 if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X) 710 { 711 if (mask & NVFX_FP_MASK_X) 712 nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); 713 if (mask & NVFX_FP_MASK_Y) 714 nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none)); 715 } 716 else 717 { 718 if (mask & NVFX_FP_MASK_Y) 719 nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none)); 720 if (mask & NVFX_FP_MASK_X) 721 nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); 722 } 723 break; 724 case TGSI_OPCODE_SEQ: 725 nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none)); 726 break; 727 case TGSI_OPCODE_SFL: 728 nvfx_fp_emit(fpc, arith(sat, SFL, dst, mask, src[0], src[1], none)); 729 break; 730 case TGSI_OPCODE_SGE: 731 nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none)); 732 break; 733 case TGSI_OPCODE_SGT: 734 nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none)); 735 break; 736 case TGSI_OPCODE_SIN: 737 nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none)); 738 break; 739 case TGSI_OPCODE_SLE: 740 nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none)); 741 break; 742 case TGSI_OPCODE_SLT: 743 nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none)); 744 break; 745 case TGSI_OPCODE_SNE: 746 nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none)); 747 break; 748 case TGSI_OPCODE_SSG: 749 { 750 struct nvfx_src minones = swz(nvfx_src(nvfx_fp_imm(fpc, -1, -1, -1, -1)), X, X, X, X); 751 752 insn = arith(sat, MOV, dst, mask, src[0], none, none); 753 insn.cc_update = 1; 754 nvfx_fp_emit(fpc, insn); 755 756 insn = arith(0, STR, dst, mask, none, none, none); 757 insn.cc_test = NVFX_COND_GT; 758 nvfx_fp_emit(fpc, insn); 759 760 if(!sat) { 761 insn = arith(0, MOV, dst, mask, minones, none, none); 762 insn.cc_test = NVFX_COND_LT; 763 nvfx_fp_emit(fpc, insn); 764 } 765 break; 766 } 767 case TGSI_OPCODE_STR: 768 nvfx_fp_emit(fpc, arith(sat, STR, dst, mask, src[0], src[1], none)); 769 break; 770 case TGSI_OPCODE_SUB: 771 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], neg(src[1]), none)); 772 break; 773 case TGSI_OPCODE_TEX: 774 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); 775 break; 776 case TGSI_OPCODE_TRUNC: 777 tmp = nvfx_src(temp(fpc)); 778 insn = arith(0, MOV, none.reg, mask, src[0], none, none); 779 insn.cc_update = 1; 780 nvfx_fp_emit(fpc, insn); 781 782 nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none)); 783 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none)); 784 785 insn = arith(sat, MOV, dst, mask, neg(tmp), none, none); 786 insn.cc_test = NVFX_COND_LT; 787 nvfx_fp_emit(fpc, insn); 788 break; 789 case TGSI_OPCODE_TXB: 790 nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none)); 791 break; 792 case TGSI_OPCODE_TXL: 793 if(nvfx->is_nv4x) 794 nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none)); 795 else /* unsupported on nv30, use TEX and hope they like it */ 796 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); 797 break; 798 case TGSI_OPCODE_TXP: 799 nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none)); 800 break; 801 case TGSI_OPCODE_XPD: 802 tmp = nvfx_src(temp(fpc)); 803 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none)); 804 nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp))); 805 break; 806 807 case TGSI_OPCODE_IF: 808 // MOVRC0 R31 (TR0.xyzw), R<src>: 809 // IF (NE.xxxx) ELSE <else> END <end> 810 if(!nvfx->use_nv4x) 811 goto nv3x_cflow; 812 nv40_fp_if(fpc, src[0]); 813 break; 814 815 case TGSI_OPCODE_ELSE: 816 { 817 uint32_t *hw; 818 if(!nvfx->use_nv4x) 819 goto nv3x_cflow; 820 assert(util_dynarray_contains(&fpc->if_stack, unsigned)); 821 hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)]; 822 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; 823 break; 824 } 825 826 case TGSI_OPCODE_ENDIF: 827 { 828 uint32_t *hw; 829 if(!nvfx->use_nv4x) 830 goto nv3x_cflow; 831 assert(util_dynarray_contains(&fpc->if_stack, unsigned)); 832 hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)]; 833 if(!hw[2]) 834 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; 835 hw[3] = fpc->fp->insn_len; 836 break; 837 } 838 839 case TGSI_OPCODE_BRA: 840 /* This can in limited cases be implemented with an IF with the else and endif labels pointing to the target */ 841 /* no state tracker uses this, so don't implement this for now */ 842 assert(0); 843 nv40_fp_bra(fpc, finst->Label.Label); 844 break; 845 846 case TGSI_OPCODE_BGNSUB: 847 case TGSI_OPCODE_ENDSUB: 848 /* nothing to do here */ 849 break; 850 851 case TGSI_OPCODE_CAL: 852 if(!nvfx->use_nv4x) 853 goto nv3x_cflow; 854 nv40_fp_cal(fpc, finst->Label.Label); 855 break; 856 857 case TGSI_OPCODE_RET: 858 if(!nvfx->use_nv4x) 859 goto nv3x_cflow; 860 nv40_fp_ret(fpc); 861 break; 862 863 case TGSI_OPCODE_BGNLOOP: 864 if(!nvfx->use_nv4x) 865 goto nv3x_cflow; 866 /* TODO: we should support using two nested REPs to allow a > 255 iteration count */ 867 nv40_fp_rep(fpc, 255, finst->Label.Label); 868 break; 869 870 case TGSI_OPCODE_ENDLOOP: 871 break; 872 873 case TGSI_OPCODE_BRK: 874 if(!nvfx->use_nv4x) 875 goto nv3x_cflow; 876 nv40_fp_brk(fpc); 877 break; 878 879 case TGSI_OPCODE_CONT: 880 { 881 static int warned = 0; 882 if(!warned) { 883 NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n"); 884 warned = 1; 885 } 886 break; 887 } 888 889 default: 890 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); 891 return FALSE; 892 } 893 894 out: 895 release_temps(fpc); 896 return TRUE; 897 nv3x_cflow: 898 { 899 static int warned = 0; 900 if(!warned) { 901 NOUVEAU_ERR( 902 "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n" 903 "If rendering is incorrect, try to disable GLSL support in the application.\n"); 904 warned = 1; 905 } 906 } 907 goto out; 908 } 909 910 static boolean 911 nvfx_fragprog_parse_decl_input(struct nv30_context *nvfx, struct nvfx_fpc *fpc, 912 const struct tgsi_full_declaration *fdec) 913 { 914 unsigned idx = fdec->Range.First; 915 unsigned hw; 916 917 switch (fdec->Semantic.Name) { 918 case TGSI_SEMANTIC_POSITION: 919 hw = NVFX_FP_OP_INPUT_SRC_POSITION; 920 break; 921 case TGSI_SEMANTIC_COLOR: 922 hw = NVFX_FP_OP_INPUT_SRC_COL0 + fdec->Semantic.Index; 923 break; 924 case TGSI_SEMANTIC_FOG: 925 hw = NVFX_FP_OP_INPUT_SRC_FOGC; 926 break; 927 case TGSI_SEMANTIC_FACE: 928 hw = NV40_FP_OP_INPUT_SRC_FACING; 929 break; 930 case TGSI_SEMANTIC_GENERIC: 931 if (fdec->Semantic.Index >= 8) 932 return TRUE; 933 934 fpc->fp->texcoord[fdec->Semantic.Index] = fdec->Semantic.Index; 935 fpc->fp->texcoords |= (1 << fdec->Semantic.Index); 936 fpc->fp->vp_or |= (0x00004000 << fdec->Semantic.Index); 937 hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.Index); 938 break; 939 default: 940 assert(0); 941 return FALSE; 942 } 943 944 fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw); 945 return TRUE; 946 } 947 948 static boolean 949 nvfx_fragprog_assign_generic(struct nv30_context *nvfx, struct nvfx_fpc *fpc, 950 const struct tgsi_full_declaration *fdec) 951 { 952 unsigned num_texcoords = nvfx->use_nv4x ? 10 : 8; 953 unsigned idx = fdec->Range.First; 954 unsigned hw; 955 956 switch (fdec->Semantic.Name) { 957 case TGSI_SEMANTIC_GENERIC: 958 if (fdec->Semantic.Index >= 8) { 959 for (hw = 0; hw < num_texcoords; hw++) { 960 if (fpc->fp->texcoord[hw] == 0xffff) { 961 fpc->fp->texcoord[hw] = fdec->Semantic.Index; 962 if (hw <= 7) { 963 fpc->fp->texcoords |= (0x1 << hw); 964 fpc->fp->vp_or |= (0x00004000 << hw); 965 } else { 966 fpc->fp->vp_or |= (0x00001000 << (hw - 8)); 967 } 968 if (fdec->Semantic.Index == 9) 969 fpc->fp->point_sprite_control |= (0x00000100 << hw); 970 hw = NVFX_FP_OP_INPUT_SRC_TC(hw); 971 fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw); 972 return TRUE; 973 } 974 } 975 return FALSE; 976 } 977 return TRUE; 978 default: 979 return TRUE; 980 } 981 } 982 983 static boolean 984 nvfx_fragprog_parse_decl_output(struct nv30_context* nvfx, struct nvfx_fpc *fpc, 985 const struct tgsi_full_declaration *fdec) 986 { 987 unsigned idx = fdec->Range.First; 988 unsigned hw; 989 990 switch (fdec->Semantic.Name) { 991 case TGSI_SEMANTIC_POSITION: 992 hw = 1; 993 break; 994 case TGSI_SEMANTIC_COLOR: 995 hw = ~0; 996 switch (fdec->Semantic.Index) { 997 case 0: hw = 0; break; 998 case 1: hw = 2; break; 999 case 2: hw = 3; break; 1000 case 3: hw = 4; break; 1001 } 1002 if(hw > ((nvfx->use_nv4x) ? 4 : 2)) { 1003 NOUVEAU_ERR("bad rcol index\n"); 1004 return FALSE; 1005 } 1006 break; 1007 default: 1008 NOUVEAU_ERR("bad output semantic\n"); 1009 return FALSE; 1010 } 1011 1012 fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); 1013 fpc->r_temps |= (1ULL << hw); 1014 return TRUE; 1015 } 1016 1017 static boolean 1018 nvfx_fragprog_prepare(struct nv30_context* nvfx, struct nvfx_fpc *fpc) 1019 { 1020 struct tgsi_parse_context p; 1021 int high_temp = -1, i; 1022 1023 fpc->r_imm = CALLOC(fpc->fp->info.immediate_count, sizeof(struct nvfx_reg)); 1024 1025 tgsi_parse_init(&p, fpc->fp->pipe.tokens); 1026 while (!tgsi_parse_end_of_tokens(&p)) { 1027 const union tgsi_full_token *tok = &p.FullToken; 1028 1029 tgsi_parse_token(&p); 1030 switch(tok->Token.Type) { 1031 case TGSI_TOKEN_TYPE_DECLARATION: 1032 { 1033 const struct tgsi_full_declaration *fdec; 1034 fdec = &p.FullToken.FullDeclaration; 1035 switch (fdec->Declaration.File) { 1036 case TGSI_FILE_INPUT: 1037 if (!nvfx_fragprog_parse_decl_input(nvfx, fpc, fdec)) 1038 goto out_err; 1039 break; 1040 case TGSI_FILE_OUTPUT: 1041 if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec)) 1042 goto out_err; 1043 break; 1044 case TGSI_FILE_TEMPORARY: 1045 if (fdec->Range.Last > high_temp) { 1046 high_temp = 1047 fdec->Range.Last; 1048 } 1049 break; 1050 default: 1051 break; 1052 } 1053 } 1054 break; 1055 case TGSI_TOKEN_TYPE_IMMEDIATE: 1056 { 1057 struct tgsi_full_immediate *imm; 1058 1059 imm = &p.FullToken.FullImmediate; 1060 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); 1061 assert(fpc->nr_imm < fpc->fp->info.immediate_count); 1062 1063 fpc->r_imm[fpc->nr_imm++] = nvfx_fp_imm(fpc, imm->u[0].Float, imm->u[1].Float, imm->u[2].Float, imm->u[3].Float); 1064 break; 1065 } 1066 default: 1067 break; 1068 } 1069 } 1070 tgsi_parse_free(&p); 1071 1072 tgsi_parse_init(&p, fpc->fp->pipe.tokens); 1073 while (!tgsi_parse_end_of_tokens(&p)) { 1074 const struct tgsi_full_declaration *fdec; 1075 tgsi_parse_token(&p); 1076 switch(p.FullToken.Token.Type) { 1077 case TGSI_TOKEN_TYPE_DECLARATION: 1078 fdec = &p.FullToken.FullDeclaration; 1079 switch (fdec->Declaration.File) { 1080 case TGSI_FILE_INPUT: 1081 if (!nvfx_fragprog_assign_generic(nvfx, fpc, fdec)) 1082 goto out_err; 1083 break; 1084 default: 1085 break; 1086 } 1087 break; 1088 default: 1089 break; 1090 } 1091 } 1092 tgsi_parse_free(&p); 1093 1094 if (++high_temp) { 1095 fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); 1096 for (i = 0; i < high_temp; i++) 1097 fpc->r_temp[i] = temp(fpc); 1098 fpc->r_temps_discard = 0ULL; 1099 } 1100 1101 return TRUE; 1102 1103 out_err: 1104 if (fpc->r_temp) { 1105 FREE(fpc->r_temp); 1106 fpc->r_temp = NULL; 1107 } 1108 tgsi_parse_free(&p); 1109 return FALSE; 1110 } 1111 1112 DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE) 1113 1114 void 1115 _nvfx_fragprog_translate(struct nv30_context *nvfx, struct nv30_fragprog *fp, 1116 boolean emulate_sprite_flipping) 1117 { 1118 struct tgsi_parse_context parse; 1119 struct nvfx_fpc *fpc = NULL; 1120 struct util_dynarray insns; 1121 1122 fp->translated = FALSE; 1123 fp->point_sprite_control = 0; 1124 fp->vp_or = 0; 1125 1126 fpc = CALLOC_STRUCT(nvfx_fpc); 1127 if (!fpc) 1128 goto out_err; 1129 1130 fpc->max_temps = nvfx->use_nv4x ? 48 : 32; 1131 fpc->fp = fp; 1132 fpc->num_regs = 2; 1133 memset(fp->texcoord, 0xff, sizeof(fp->texcoord)); 1134 1135 for (unsigned i = 0; i < fp->info.num_properties; ++i) { 1136 switch (fp->info.properties[i].name) { 1137 case TGSI_PROPERTY_FS_COORD_ORIGIN: 1138 if (fp->info.properties[i].data[0]) 1139 fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED; 1140 break; 1141 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: 1142 if (fp->info.properties[i].data[0]) 1143 fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER; 1144 break; 1145 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 1146 if (fp->info.properties[i].data[0]) 1147 fp->rt_enable |= NV30_3D_RT_ENABLE_MRT; 1148 break; 1149 default: 1150 break; 1151 } 1152 } 1153 1154 if (!nvfx_fragprog_prepare(nvfx, fpc)) 1155 goto out_err; 1156 1157 tgsi_parse_init(&parse, fp->pipe.tokens); 1158 util_dynarray_init(&insns); 1159 1160 while (!tgsi_parse_end_of_tokens(&parse)) { 1161 tgsi_parse_token(&parse); 1162 1163 switch (parse.FullToken.Token.Type) { 1164 case TGSI_TOKEN_TYPE_INSTRUCTION: 1165 { 1166 const struct tgsi_full_instruction *finst; 1167 1168 util_dynarray_append(&insns, unsigned, fp->insn_len); 1169 finst = &parse.FullToken.FullInstruction; 1170 if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst)) 1171 goto out_err; 1172 } 1173 break; 1174 default: 1175 break; 1176 } 1177 } 1178 util_dynarray_append(&insns, unsigned, fp->insn_len); 1179 1180 for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) 1181 { 1182 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i); 1183 fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target]; 1184 } 1185 util_dynarray_fini(&insns); 1186 1187 if(!nvfx->is_nv4x) 1188 fp->fp_control |= (fpc->num_regs-1)/2; 1189 else 1190 fp->fp_control |= fpc->num_regs << NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT; 1191 1192 /* Terminate final instruction */ 1193 if(fp->insn) 1194 fp->insn[fpc->inst_offset] |= 0x00000001; 1195 1196 /* Append NOP + END instruction for branches to the end of the program */ 1197 fpc->inst_offset = fp->insn_len; 1198 grow_insns(fpc, 4); 1199 fp->insn[fpc->inst_offset + 0] = 0x00000001; 1200 fp->insn[fpc->inst_offset + 1] = 0x00000000; 1201 fp->insn[fpc->inst_offset + 2] = 0x00000000; 1202 fp->insn[fpc->inst_offset + 3] = 0x00000000; 1203 1204 if(debug_get_option_nvfx_dump_fp()) 1205 { 1206 debug_printf("\n"); 1207 tgsi_dump(fp->pipe.tokens, 0); 1208 1209 debug_printf("\n%s fragment program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x"); 1210 for (unsigned i = 0; i < fp->insn_len; i += 4) 1211 debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]); 1212 debug_printf("\n"); 1213 } 1214 1215 fp->translated = TRUE; 1216 1217 out: 1218 tgsi_parse_free(&parse); 1219 if(fpc) 1220 { 1221 if (fpc->r_temp) 1222 FREE(fpc->r_temp); 1223 util_dynarray_fini(&fpc->if_stack); 1224 util_dynarray_fini(&fpc->label_relocs); 1225 util_dynarray_fini(&fpc->imm_data); 1226 //util_dynarray_fini(&fpc->loop_stack); 1227 FREE(fpc); 1228 } 1229 1230 return; 1231 1232 out_err: 1233 _debug_printf("Error: failed to compile this fragment program:\n"); 1234 tgsi_dump(fp->pipe.tokens, 0); 1235 goto out; 1236 } 1237 1238 static inline void 1239 nvfx_fp_memcpy(void* dst, const void* src, size_t len) 1240 { 1241 #ifndef PIPE_ARCH_BIG_ENDIAN 1242 memcpy(dst, src, len); 1243 #else 1244 size_t i; 1245 for(i = 0; i < len; i += 4) { 1246 uint32_t v = *(uint32_t*)((char*)src + i); 1247 *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16); 1248 } 1249 #endif 1250 } 1251