Home | History | Annotate | Download | only in nv30

Lines Matching refs:fpc

45 temp(struct nvfx_fpc *fpc)
47 int idx = __builtin_ctzll(~fpc->r_temps);
49 if (idx >= fpc->max_temps) {
55 fpc->r_temps |= (1ULL << idx);
56 fpc->r_temps_discard |= (1ULL << idx);
61 release_temps(struct nvfx_fpc *fpc)
63 fpc->r_temps &= ~fpc->r_temps_discard;
64 fpc->r_temps_discard = 0ULL;
68 nvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d)
71 int idx = fpc->imm_data.size >> 4;
73 memcpy(util_dynarray_grow(&fpc->imm_data, sizeof(float) * 4), v, 4 * sizeof(float));
78 grow_insns(struct nvfx_fpc *fpc, int size)
80 struct nv30_fragprog *fp = fpc->fp;
87 emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src)
89 struct nv30_fragprog *fp = fpc->fp;
90 uint32_t *hw = &fp->insn[fpc->inst_offset];
106 if (!fpc->have_const) {
107 grow_insns(fpc, 4);
108 hw = &fp->insn[fpc->inst_offset];
109 fpc->have_const = 1;
112 memcpy(&fp->insn[fpc->inst_offset + 4],
113 (float*)fpc->imm_data.data + src.reg.index * 4,
119 if (!fpc->have_const) {
120 grow_insns(fpc, 4);
121 hw = &fp->insn[fpc->inst_offset];
122 fpc->have_const = 1;
131 fpd->offset = fpc->inst_offset + 4;
160 emit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst)
162 struct nv30_fragprog *fp = fpc->fp;
163 uint32_t *hw = &fp->insn[fpc->inst_offset];
175 if (fpc->num_regs < (dst.index + 1))
176 fpc->num_regs = dst.index + 1;
189 nvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn)
191 struct nv30_fragprog *fp = fpc->fp;
194 fpc->inst_offset = fp->insn_len;
195 fpc->have_const = 0;
196 grow_insns(fpc, 4);
197 hw = &fp->insn[fpc->inst_offset];
222 emit_dst(fpc, insn.dst);
223 emit_src(fpc, 0, insn.src[0]);
224 emit_src(fpc, 1, insn.src[1]);
225 emit_src(fpc, 2, insn.src[2]);
238 nv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src)
244 nvfx_fp_emit(fpc, insn);
246 fpc->inst_offset = fpc->fp->insn_len;
247 grow_insns(fpc, 4);
248 hw = &fpc->fp->insn[fpc->inst_offset];
261 util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset);
266 nv40_fp_cal(struct nvfx_fpc *fpc, unsigned target)
270 fpc->inst_offset = fpc->fp->insn_len;
271 grow_insns(fpc, 4);
272 hw = &fpc->fp->insn[fpc->inst_offset];
281 reloc.location = fpc->inst_offset + 2;
282 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
286 nv40_fp_ret(struct nvfx_fpc *fpc)
289 fpc->inst_offset = fpc->fp->insn_len;
290 grow_insns(fpc, 4);
291 hw = &fpc->fp->insn[fpc->inst_offset];
302 nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target)
306 fpc->inst_offset = fpc->fp->insn_len;
307 grow_insns(fpc, 4);
308 hw = &fpc->fp->insn[fpc->inst_offset];
322 reloc.location = fpc->inst_offset + 3;
323 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
324 //util_dynarray_append(&fpc->loop_stack, unsigned, target);
329 nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target)
333 fpc->inst_offset = fpc->fp->insn_len;
334 grow_insns(fpc, 4);
335 hw = &fpc->fp->insn[fpc->inst_offset];
346 reloc.location = fpc->inst_offset + 2;
347 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
349 reloc.location = fpc->inst_offset + 3;
350 util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc);
354 nv40_fp_brk(struct nvfx_fpc *fpc)
357 fpc->inst_offset = fpc->fp->insn_len;
358 grow_insns(fpc, 4);
359 hw = &fpc->fp->insn[fpc->inst_offset];
371 tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
377 src.reg = fpc->r_input[fsrc->Register.Index];
383 assert(fsrc->Register.Index < fpc->nr_imm);
384 src.reg = fpc->r_imm[fsrc->Register.Index];
387 src.reg = fpc->r_temp[fsrc->Register.Index];
391 src.reg = fpc->r_result[fsrc->Register.Index];
413 tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
416 return fpc->r_result[fdst->Register.Index];
418 return fpc->r_temp[fdst->Register.Index];
440 nvfx_fragprog_parse_instruction(struct nv30_context* nvfx, struct nvfx_fpc *fpc,
459 src[i] = tgsi_src(fpc, fsrc);
470 if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG && (0
477 struct nvfx_src addend = nvfx_src(nvfx_fp_imm(fpc, 0, 0, 0, 1));
482 src[i] = nvfx_src(temp(fpc));
483 nvfx_fp_emit(fpc, arith(0, ADD, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), addend, none));
486 src[i] = tgsi_src(fpc, fsrc);
488 src[i] = nvfx_src(temp(fpc));
489 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
496 src[i] = tgsi_src(fpc, fsrc);
498 src[i] = nvfx_src(temp(fpc));
499 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
506 src[i] = tgsi_src(fpc, fsrc);
508 src[i] = nvfx_src(temp(fpc));
509 nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none));
526 dst = tgsi_dst(fpc, &finst->Dst[0]);
532 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, abs(src[0]), none, none));
535 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none));
538 tmp = nvfx_src(temp(fpc));
539 nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none));
540 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none));
545 nvfx_fp_emit(fpc, insn);
549 nvfx_fp_emit(fpc, insn);
553 nvfx_fp_emit(fpc, insn);
556 nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none));
560 tmp = nvfx_src(temp(fpc));
561 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
562 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
563 nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
564 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
566 nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none));
571 tmp = nvfx_src(temp(fpc));
572 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none));
573 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none));
574 nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none));
575 nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none));
577 nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none));
581 tmp = nvfx_src(temp(fpc));
582 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none));
583 nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none));
586 nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none));
589 nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none));
592 tmp = nvfx_src(temp(fpc));
593 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none));
594 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none));
597 nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none));
600 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none));
603 nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none));
606 nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none));
609 nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none));
614 nvfx_fp_emit(fpc, insn);
618 nvfx_fp_emit(fpc, insn);
621 nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none));
625 nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], none, none));
632 struct nvfx_src maxs = nvfx_src(nvfx_fp_imm(fpc, 0, FLT_MIN, 0, 0));
633 tmp = nvfx_src(temp(fpc));
635 nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none));
638 nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none));
639 nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none));
640 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none));
641 nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none));
646 nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2]));
648 tmp = nvfx_src(temp(fpc));
649 nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
650 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp));
654 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2]));
657 nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none));
660 nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none));
663 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none));
666 nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none));
672 nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none));
674 tmp = nvfx_src(temp(fpc));
675 nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
676 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
677 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none));
681 nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none));
685 nvfx_fp_emit(fpc, arith(0, RFL_NV30, dst, mask, src[0], src[1], none));
687 tmp = nvfx_src(temp(fpc));
688 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[0], none));
689 nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_Y, src[0], src[1], none));
692 nvfx_fp_emit(fpc, insn);
693 nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])));
698 nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none));
700 tmp = nvfx_src(temp(fpc));
703 nvfx_fp_emit(fpc, insn);
704 nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
712 nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
714 nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
719 nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
721 nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
725 nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
728 nvfx_fp_emit(fpc, arith(sat, SFL, dst, mask, src[0], src[1], none));
731 nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none));
734 nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none));
737 nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none));
740 nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none));
743 nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none));
746 nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none));
750 struct nvfx_src minones = swz(nvfx_src(nvfx_fp_imm(fpc, -1, -1, -1, -1)), X, X, X, X);
754 nvfx_fp_emit(fpc, insn);
758 nvfx_fp_emit(fpc, insn);
763 nvfx_fp_emit(fpc, insn);
768 nvfx_fp_emit(fpc, arith(sat, STR, dst, mask, src[0], src[1], none));
771 nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], neg(src[1]), none));
774 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
777 tmp = nvfx_src(temp(fpc));
780 nvfx_fp_emit(fpc, insn);
782 nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none));
783 nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none));
787 nvfx_fp_emit(fpc, insn);
790 nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none));
794 nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none));
796 nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
799 nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
802 tmp = nvfx_src(temp(fpc));
803 nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
804 nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
812 nv40_fp_if(fpc, src[0]);
820 assert(util_dynarray_contains(&fpc->if_stack, unsigned));
821 hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)];
822 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
831 assert(util_dynarray_contains(&fpc->if_stack, unsigned));
832 hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)];
834 hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
835 hw[3] = fpc->fp->insn_len;
843 nv40_fp_bra(fpc, finst->Label.Label);
854 nv40_fp_cal(fpc, finst->Label.Label);
860 nv40_fp_ret(fpc);
867 nv40_fp_rep(fpc, 255, finst->Label.Label);
876 nv40_fp_brk(fpc);
895 release_temps(fpc);
911 nvfx_fragprog_parse_decl_input(struct nv30_context *nvfx, struct nvfx_fpc *fpc,
934 fpc->fp->texcoord[fdec->Semantic.Index] = fdec->Semantic.Index;
935 fpc->fp->texcoords |= (1 << fdec->Semantic.Index);
936 fpc
944 fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
949 nvfx_fragprog_assign_generic(struct nv30_context *nvfx, struct nvfx_fpc *fpc,
960 if (fpc->fp->texcoord[hw] == 0xffff) {
961 fpc->fp->texcoord[hw] = fdec->Semantic.Index;
963 fpc->fp->texcoords |= (0x1 << hw);
964 fpc->fp->vp_or |= (0x00004000 << hw);
966 fpc->fp->vp_or |= (0x00001000 << (hw - 8));
969 fpc->fp->point_sprite_control |= (0x00000100 << hw);
971 fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
984 nvfx_fragprog_parse_decl_output(struct nv30_context* nvfx, struct nvfx_fpc *fpc,
1012 fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
1013 fpc->r_temps |= (1ULL << hw);
1018 nvfx_fragprog_prepare(struct nv30_context* nvfx, struct nvfx_fpc *fpc)
1023 fpc->r_imm = CALLOC(fpc->fp->info.immediate_count, sizeof(struct nvfx_reg));
1025 tgsi_parse_init(&p, fpc->fp->pipe.tokens);
1037 if (!nvfx_fragprog_parse_decl_input(nvfx, fpc, fdec))
1041 if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
1061 assert(fpc->nr_imm < fpc->fp->info.immediate_count);
1063 fpc->r_imm[fpc->nr_imm++] = nvfx_fp_imm(fpc, imm->u[0].Float, imm->u[1].Float, imm->u[2].Float, imm->u[3].Float);
1072 tgsi_parse_init(&p, fpc->fp->pipe.tokens);
1081 if (!nvfx_fragprog_assign_generic(nvfx, fpc, fdec))
1095 fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
1097 fpc->r_temp[i] = temp(fpc);
1098 fpc->r_temps_discard = 0ULL;
1104 if (fpc->r_temp) {
1105 FREE(fpc->r_temp);
1106 fpc->r_temp = NULL;
1119 struct nvfx_fpc *fpc = NULL;
1126 fpc = CALLOC_STRUCT(nvfx_fpc);
1127 if (!fpc)
1130 fpc->max_temps = nvfx->use_nv4x ? 48 : 32;
1131 fpc->fp = fp;
1132 fpc->num_regs = 2;
1154 if (!nvfx_fragprog_prepare(nvfx, fpc))
1170 if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst))
1180 for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
1182 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i);
1188 fp->fp_control |= (fpc->num_regs-1)/2;
1190 fp->fp_control |= fpc->num_regs << NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT;
1194 fp->insn[fpc->inst_offset] |= 0x00000001;
1197 fpc->inst_offset = fp->insn_len;
1198 grow_insns(fpc, 4);
1199 fp->insn[fpc->inst_offset + 0] = 0x00000001;
1200 fp->insn[fpc->inst_offset + 1] = 0x00000000;
1201 fp->insn[fpc->inst_offset + 2] = 0x00000000;
1202 fp->insn[fpc->inst_offset + 3] = 0x00000000;
1219 if(fpc)
1221 if (fpc->r_temp)
1222 FREE(fpc->r_temp);
1223 util_dynarray_fini(&fpc->if_stack);
1224 util_dynarray_fini(&fpc->label_relocs);
1225 util_dynarray_fini(&fpc->imm_data);
1226 //util_dynarray_fini(&fpc->loop_stack);
1227 FREE(fpc);