1 #include "pipe/p_context.h" 2 #include "pipe/p_defines.h" 3 #include "pipe/p_state.h" 4 #include "util/u_linkage.h" 5 #include "util/u_debug.h" 6 7 #include "pipe/p_shader_tokens.h" 8 #include "tgsi/tgsi_parse.h" 9 #include "tgsi/tgsi_dump.h" 10 #include "tgsi/tgsi_util.h" 11 #include "tgsi/tgsi_ureg.h" 12 13 #include "draw/draw_context.h" 14 15 #include "nv30-40_3d.xml.h" 16 #include "nv30_context.h" 17 #include "nv30_resource.h" 18 19 /* TODO (at least...): 20 * 1. Indexed consts + ARL 21 * 3. NV_vp11, NV_vp2, NV_vp3 features 22 * - extra arith opcodes 23 * - branching 24 * - texture sampling 25 * - indexed attribs 26 * - indexed results 27 * 4. bugs 28 */ 29 30 #include "nv30_vertprog.h" 31 #include "nv40_vertprog.h" 32 33 struct nvfx_loop_entry { 34 unsigned brk_target; 35 unsigned cont_target; 36 }; 37 38 struct nvfx_vpc { 39 struct nv30_context* nv30; 40 struct pipe_shader_state pipe; 41 struct nv30_vertprog *vp; 42 struct tgsi_shader_info* info; 43 44 struct nv30_vertprog_exec *vpi; 45 46 unsigned r_temps; 47 unsigned r_temps_discard; 48 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; 49 struct nvfx_reg *r_address; 50 struct nvfx_reg *r_temp; 51 struct nvfx_reg *r_const; 52 struct nvfx_reg r_0_1; 53 54 struct nvfx_reg *imm; 55 unsigned nr_imm; 56 57 int hpos_idx; 58 int cvtx_idx; 59 60 struct util_dynarray label_relocs; 61 struct util_dynarray loop_stack; 62 }; 63 64 static struct nvfx_reg 65 temp(struct nvfx_vpc *vpc) 66 { 67 int idx = ffs(~vpc->r_temps) - 1; 68 69 if (idx < 0) { 70 NOUVEAU_ERR("out of temps!!\n"); 71 assert(0); 72 return nvfx_reg(NVFXSR_TEMP, 0); 73 } 74 75 vpc->r_temps |= (1 << idx); 76 vpc->r_temps_discard |= (1 << idx); 77 return nvfx_reg(NVFXSR_TEMP, idx); 78 } 79 80 static inline void 81 release_temps(struct nvfx_vpc *vpc) 82 { 83 vpc->r_temps &= ~vpc->r_temps_discard; 84 vpc->r_temps_discard = 0; 85 } 86 87 static struct nvfx_reg 88 constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) 89 { 90 struct nv30_vertprog *vp = vpc->vp; 91 struct nv30_vertprog_data *vpd; 92 int idx; 93 94 if (pipe >= 0) { 95 for (idx = 0; idx < vp->nr_consts; idx++) { 96 if (vp->consts[idx].index == pipe) 97 return nvfx_reg(NVFXSR_CONST, idx); 98 } 99 } 100 101 idx = vp->nr_consts++; 102 vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); 103 vpd = &vp->consts[idx]; 104 105 vpd->index = pipe; 106 vpd->value[0] = x; 107 vpd->value[1] = y; 108 vpd->value[2] = z; 109 vpd->value[3] = w; 110 return nvfx_reg(NVFXSR_CONST, idx); 111 } 112 113 #define arith(s,t,o,d,m,s0,s1,s2) \ 114 nvfx_insn((s), (NVFX_VP_INST_SLOT_##t << 7) | NVFX_VP_INST_##t##_OP_##o, -1, (d), (m), (s0), (s1), (s2)) 115 116 static void 117 emit_src(struct nv30_context *nv30, struct nvfx_vpc *vpc, uint32_t *hw, 118 int pos, struct nvfx_src src) 119 { 120 struct nv30_vertprog *vp = vpc->vp; 121 uint32_t sr = 0; 122 struct nvfx_relocation reloc; 123 124 switch (src.reg.type) { 125 case NVFXSR_TEMP: 126 sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); 127 sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); 128 break; 129 case NVFXSR_INPUT: 130 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << 131 NVFX_VP(SRC_REG_TYPE_SHIFT)); 132 vp->ir |= (1 << src.reg.index); 133 hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); 134 break; 135 case NVFXSR_CONST: 136 sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << 137 NVFX_VP(SRC_REG_TYPE_SHIFT)); 138 if (src.reg.index < 256 && src.reg.index >= -256) { 139 reloc.location = vp->nr_insns - 1; 140 reloc.target = src.reg.index; 141 util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc); 142 } else { 143 hw[1] |= (src.reg.index << NVFX_VP(INST_CONST_SRC_SHIFT)) & 144 NVFX_VP(INST_CONST_SRC_MASK); 145 } 146 break; 147 case NVFXSR_NONE: 148 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << 149 NVFX_VP(SRC_REG_TYPE_SHIFT)); 150 break; 151 default: 152 assert(0); 153 } 154 155 if (src.negate) 156 sr |= NVFX_VP(SRC_NEGATE); 157 158 if (src.abs) 159 hw[0] |= (1 << (21 + pos)); 160 161 sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) | 162 (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) | 163 (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | 164 (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); 165 166 if(src.indirect) { 167 if(src.reg.type == NVFXSR_CONST) 168 hw[3] |= NVFX_VP(INST_INDEX_CONST); 169 else if(src.reg.type == NVFXSR_INPUT) 170 hw[0] |= NVFX_VP(INST_INDEX_INPUT); 171 else 172 assert(0); 173 174 if(src.indirect_reg) 175 hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1); 176 hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT); 177 } 178 179 switch (pos) { 180 case 0: 181 hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> 182 NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT); 183 hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) << 184 NVFX_VP(INST_SRC0L_SHIFT); 185 break; 186 case 1: 187 hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT); 188 break; 189 case 2: 190 hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >> 191 NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT); 192 hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) << 193 NVFX_VP(INST_SRC2L_SHIFT); 194 break; 195 default: 196 assert(0); 197 } 198 } 199 200 static void 201 emit_dst(struct nv30_context *nv30, struct nvfx_vpc *vpc, uint32_t *hw, 202 int slot, struct nvfx_reg dst) 203 { 204 struct nv30_vertprog *vp = vpc->vp; 205 206 switch (dst.type) { 207 case NVFXSR_NONE: 208 if(!nv30->is_nv4x) 209 hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK; 210 else { 211 hw[3] |= NV40_VP_INST_DEST_MASK; 212 if (slot == 0) 213 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; 214 else 215 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; 216 } 217 break; 218 case NVFXSR_TEMP: 219 if(!nv30->is_nv4x) 220 hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); 221 else { 222 hw[3] |= NV40_VP_INST_DEST_MASK; 223 if (slot == 0) 224 hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT); 225 else 226 hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT); 227 } 228 break; 229 case NVFXSR_OUTPUT: 230 /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ 231 if(nv30->is_nv4x) { 232 switch (dst.index) { 233 case NV30_VP_INST_DEST_CLP(0): 234 dst.index = NVFX_VP(INST_DEST_FOGC); 235 vp->or |= (1 << 6); 236 break; 237 case NV30_VP_INST_DEST_CLP(1): 238 dst.index = NVFX_VP(INST_DEST_FOGC); 239 vp->or |= (1 << 7); 240 break; 241 case NV30_VP_INST_DEST_CLP(2): 242 dst.index = NVFX_VP(INST_DEST_FOGC); 243 vp->or |= (1 << 8); 244 break; 245 case NV30_VP_INST_DEST_CLP(3): 246 dst.index = NVFX_VP(INST_DEST_PSZ); 247 vp->or |= (1 << 9); 248 break; 249 case NV30_VP_INST_DEST_CLP(4): 250 dst.index = NVFX_VP(INST_DEST_PSZ); 251 vp->or |= (1 << 10); 252 break; 253 case NV30_VP_INST_DEST_CLP(5): 254 dst.index = NVFX_VP(INST_DEST_PSZ); 255 vp->or |= (1 << 11); 256 break; 257 case NV40_VP_INST_DEST_COL0: vp->or |= (1 << 0); break; 258 case NV40_VP_INST_DEST_COL1: vp->or |= (1 << 1); break; 259 case NV40_VP_INST_DEST_BFC0: vp->or |= (1 << 2); break; 260 case NV40_VP_INST_DEST_BFC1: vp->or |= (1 << 3); break; 261 case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; 262 case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; 263 } 264 } 265 266 if(!nv30->is_nv4x) { 267 hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); 268 hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK; 269 270 /*XXX: no way this is entirely correct, someone needs to 271 * figure out what exactly it is. 272 */ 273 hw[3] |= 0x800; 274 } else { 275 hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); 276 if (slot == 0) { 277 hw[0] |= NV40_VP_INST_VEC_RESULT; 278 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; 279 } else { 280 hw[3] |= NV40_VP_INST_SCA_RESULT; 281 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; 282 } 283 } 284 break; 285 default: 286 assert(0); 287 } 288 } 289 290 static void 291 nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn) 292 { 293 struct nv30_context *nv30 = vpc->nv30; 294 struct nv30_vertprog *vp = vpc->vp; 295 unsigned slot = insn.op >> 7; 296 unsigned op = insn.op & 0x7f; 297 uint32_t *hw; 298 299 vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); 300 vpc->vpi = &vp->insns[vp->nr_insns - 1]; 301 memset(vpc->vpi, 0, sizeof(*vpc->vpi)); 302 303 hw = vpc->vpi->data; 304 305 if (insn.cc_test != NVFX_COND_TR) 306 hw[0] |= NVFX_VP(INST_COND_TEST_ENABLE); 307 hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT)); 308 hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | 309 (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | 310 (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | 311 (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT))); 312 if(insn.cc_update) 313 hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE); 314 315 if(insn.sat) { 316 assert(nv30->use_nv4x); 317 if(nv30->use_nv4x) 318 hw[0] |= NV40_VP_INST_SATURATE; 319 } 320 321 if(!nv30->is_nv4x) { 322 if(slot == 0) 323 hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); 324 else { 325 hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT); 326 hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT); 327 } 328 // hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); 329 // hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); 330 331 if (insn.dst.type == NVFXSR_OUTPUT) { 332 if (slot) 333 hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); 334 else 335 hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); 336 } else { 337 if (slot) 338 hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); 339 else 340 hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); 341 } 342 } else { 343 if (slot == 0) { 344 hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); 345 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; 346 hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); 347 } else { 348 hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); 349 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ; 350 hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); 351 } 352 } 353 354 emit_dst(nv30, vpc, hw, slot, insn.dst); 355 emit_src(nv30, vpc, hw, 0, insn.src[0]); 356 emit_src(nv30, vpc, hw, 1, insn.src[1]); 357 emit_src(nv30, vpc, hw, 2, insn.src[2]); 358 359 // if(insn.src[0].indirect || op == NVFX_VP_INST_VEC_OP_ARL) 360 // hw[3] |= NV40_VP_INST_SCA_RESULT; 361 } 362 363 static inline struct nvfx_src 364 tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { 365 struct nvfx_src src; 366 367 switch (fsrc->Register.File) { 368 case TGSI_FILE_INPUT: 369 src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index); 370 break; 371 case TGSI_FILE_CONSTANT: 372 if(fsrc->Register.Indirect) { 373 src.reg = vpc->r_const[0]; 374 src.reg.index = fsrc->Register.Index; 375 } else { 376 src.reg = vpc->r_const[fsrc->Register.Index]; 377 } 378 break; 379 case TGSI_FILE_IMMEDIATE: 380 src.reg = vpc->imm[fsrc->Register.Index]; 381 break; 382 case TGSI_FILE_TEMPORARY: 383 src.reg = vpc->r_temp[fsrc->Register.Index]; 384 break; 385 default: 386 NOUVEAU_ERR("bad src file\n"); 387 src.reg.index = 0; 388 src.reg.type = -1; 389 break; 390 } 391 392 src.abs = fsrc->Register.Absolute; 393 src.negate = fsrc->Register.Negate; 394 src.swz[0] = fsrc->Register.SwizzleX; 395 src.swz[1] = fsrc->Register.SwizzleY; 396 src.swz[2] = fsrc->Register.SwizzleZ; 397 src.swz[3] = fsrc->Register.SwizzleW; 398 src.indirect = 0; 399 src.indirect_reg = 0; 400 src.indirect_swz = 0; 401 402 if(fsrc->Register.Indirect) { 403 if(fsrc->Indirect.File == TGSI_FILE_ADDRESS && 404 (fsrc->Register.File == TGSI_FILE_CONSTANT || 405 fsrc->Register.File == TGSI_FILE_INPUT)) { 406 src.indirect = 1; 407 src.indirect_reg = fsrc->Indirect.Index; 408 src.indirect_swz = fsrc->Indirect.SwizzleX; 409 } else { 410 src.reg.index = 0; 411 src.reg.type = -1; 412 } 413 } 414 415 return src; 416 } 417 418 static INLINE struct nvfx_reg 419 tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { 420 struct nvfx_reg dst; 421 422 switch (fdst->Register.File) { 423 case TGSI_FILE_NULL: 424 dst = nvfx_reg(NVFXSR_NONE, 0); 425 break; 426 case TGSI_FILE_OUTPUT: 427 dst = vpc->r_result[fdst->Register.Index]; 428 break; 429 case TGSI_FILE_TEMPORARY: 430 dst = vpc->r_temp[fdst->Register.Index]; 431 break; 432 case TGSI_FILE_ADDRESS: 433 dst = vpc->r_address[fdst->Register.Index]; 434 break; 435 default: 436 NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File); 437 dst.index = 0; 438 dst.type = 0; 439 break; 440 } 441 442 return dst; 443 } 444 445 static inline int 446 tgsi_mask(uint tgsi) 447 { 448 int mask = 0; 449 450 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X; 451 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y; 452 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z; 453 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W; 454 return mask; 455 } 456 457 static boolean 458 nvfx_vertprog_parse_instruction(struct nv30_context *nv30, struct nvfx_vpc *vpc, 459 unsigned idx, const struct tgsi_full_instruction *finst) 460 { 461 struct nvfx_src src[3], tmp; 462 struct nvfx_reg dst; 463 struct nvfx_reg final_dst; 464 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 465 struct nvfx_insn insn; 466 struct nvfx_relocation reloc; 467 struct nvfx_loop_entry loop; 468 boolean sat = FALSE; 469 int mask; 470 int ai = -1, ci = -1, ii = -1; 471 int i; 472 unsigned sub_depth = 0; 473 474 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 475 const struct tgsi_full_src_register *fsrc; 476 477 fsrc = &finst->Src[i]; 478 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { 479 src[i] = tgsi_src(vpc, fsrc); 480 } 481 } 482 483 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 484 const struct tgsi_full_src_register *fsrc; 485 486 fsrc = &finst->Src[i]; 487 488 switch (fsrc->Register.File) { 489 case TGSI_FILE_INPUT: 490 if (ai == -1 || ai == fsrc->Register.Index) { 491 ai = fsrc->Register.Index; 492 src[i] = tgsi_src(vpc, fsrc); 493 } else { 494 src[i] = nvfx_src(temp(vpc)); 495 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, 496 tgsi_src(vpc, fsrc), none, none)); 497 } 498 break; 499 case TGSI_FILE_CONSTANT: 500 if ((ci == -1 && ii == -1) || 501 ci == fsrc->Register.Index) { 502 ci = fsrc->Register.Index; 503 src[i] = tgsi_src(vpc, fsrc); 504 } else { 505 src[i] = nvfx_src(temp(vpc)); 506 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, 507 tgsi_src(vpc, fsrc), none, none)); 508 } 509 break; 510 case TGSI_FILE_IMMEDIATE: 511 if ((ci == -1 && ii == -1) || 512 ii == fsrc->Register.Index) { 513 ii = fsrc->Register.Index; 514 src[i] = tgsi_src(vpc, fsrc); 515 } else { 516 src[i] = nvfx_src(temp(vpc)); 517 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, 518 tgsi_src(vpc, fsrc), none, none)); 519 } 520 break; 521 case TGSI_FILE_TEMPORARY: 522 /* handled above */ 523 break; 524 default: 525 NOUVEAU_ERR("bad src file\n"); 526 return FALSE; 527 } 528 } 529 530 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 531 if(src[i].reg.type < 0) 532 return FALSE; 533 } 534 535 if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS && 536 finst->Instruction.Opcode != TGSI_OPCODE_ARL) 537 return FALSE; 538 539 final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]); 540 mask = tgsi_mask(finst->Dst[0].Register.WriteMask); 541 if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) { 542 assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL); 543 if (nv30->use_nv4x) 544 sat = TRUE; 545 else 546 if(dst.type != NVFXSR_TEMP) 547 dst = temp(vpc); 548 } 549 550 switch (finst->Instruction.Opcode) { 551 case TGSI_OPCODE_ABS: 552 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, abs(src[0]), none, none)); 553 break; 554 case TGSI_OPCODE_ADD: 555 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, src[1])); 556 break; 557 case TGSI_OPCODE_ARL: 558 nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none)); 559 break; 560 case TGSI_OPCODE_CEIL: 561 tmp = nvfx_src(temp(vpc)); 562 nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, neg(src[0]), none, none)); 563 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none)); 564 break; 565 case TGSI_OPCODE_CMP: 566 insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); 567 insn.cc_update = 1; 568 nvfx_vp_emit(vpc, insn); 569 570 insn = arith(sat, VEC, MOV, dst, mask, src[2], none, none); 571 insn.cc_test = NVFX_COND_GE; 572 nvfx_vp_emit(vpc, insn); 573 574 insn = arith(sat, VEC, MOV, dst, mask, src[1], none, none); 575 insn.cc_test = NVFX_COND_LT; 576 nvfx_vp_emit(vpc, insn); 577 break; 578 case TGSI_OPCODE_COS: 579 nvfx_vp_emit(vpc, arith(sat, SCA, COS, dst, mask, none, none, src[0])); 580 break; 581 case TGSI_OPCODE_DP2: 582 tmp = nvfx_src(temp(vpc)); 583 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none)); 584 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, swz(tmp, X, X, X, X), none, swz(tmp, Y, Y, Y, Y))); 585 break; 586 case TGSI_OPCODE_DP3: 587 nvfx_vp_emit(vpc, arith(sat, VEC, DP3, dst, mask, src[0], src[1], none)); 588 break; 589 case TGSI_OPCODE_DP4: 590 nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none)); 591 break; 592 case TGSI_OPCODE_DPH: 593 nvfx_vp_emit(vpc, arith(sat, VEC, DPH, dst, mask, src[0], src[1], none)); 594 break; 595 case TGSI_OPCODE_DST: 596 nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none)); 597 break; 598 case TGSI_OPCODE_EX2: 599 nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, src[0])); 600 break; 601 case TGSI_OPCODE_EXP: 602 nvfx_vp_emit(vpc, arith(sat, SCA, EXP, dst, mask, none, none, src[0])); 603 break; 604 case TGSI_OPCODE_FLR: 605 nvfx_vp_emit(vpc, arith(sat, VEC, FLR, dst, mask, src[0], none, none)); 606 break; 607 case TGSI_OPCODE_FRC: 608 nvfx_vp_emit(vpc, arith(sat, VEC, FRC, dst, mask, src[0], none, none)); 609 break; 610 case TGSI_OPCODE_LG2: 611 nvfx_vp_emit(vpc, arith(sat, SCA, LG2, dst, mask, none, none, src[0])); 612 break; 613 case TGSI_OPCODE_LIT: 614 nvfx_vp_emit(vpc, arith(sat, SCA, LIT, dst, mask, none, none, src[0])); 615 break; 616 case TGSI_OPCODE_LOG: 617 nvfx_vp_emit(vpc, arith(sat, SCA, LOG, dst, mask, none, none, src[0])); 618 break; 619 case TGSI_OPCODE_LRP: 620 tmp = nvfx_src(temp(vpc)); 621 nvfx_vp_emit(vpc, arith(0, VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); 622 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], tmp)); 623 break; 624 case TGSI_OPCODE_MAD: 625 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], src[2])); 626 break; 627 case TGSI_OPCODE_MAX: 628 nvfx_vp_emit(vpc, arith(sat, VEC, MAX, dst, mask, src[0], src[1], none)); 629 break; 630 case TGSI_OPCODE_MIN: 631 nvfx_vp_emit(vpc, arith(sat, VEC, MIN, dst, mask, src[0], src[1], none)); 632 break; 633 case TGSI_OPCODE_MOV: 634 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, src[0], none, none)); 635 break; 636 case TGSI_OPCODE_MUL: 637 nvfx_vp_emit(vpc, arith(sat, VEC, MUL, dst, mask, src[0], src[1], none)); 638 break; 639 case TGSI_OPCODE_NOP: 640 break; 641 case TGSI_OPCODE_POW: 642 tmp = nvfx_src(temp(vpc)); 643 nvfx_vp_emit(vpc, arith(0, SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X))); 644 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); 645 nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X))); 646 break; 647 case TGSI_OPCODE_RCP: 648 nvfx_vp_emit(vpc, arith(sat, SCA, RCP, dst, mask, none, none, src[0])); 649 break; 650 case TGSI_OPCODE_RSQ: 651 nvfx_vp_emit(vpc, arith(sat, SCA, RSQ, dst, mask, none, none, abs(src[0]))); 652 break; 653 case TGSI_OPCODE_SEQ: 654 nvfx_vp_emit(vpc, arith(sat, VEC, SEQ, dst, mask, src[0], src[1], none)); 655 break; 656 case TGSI_OPCODE_SFL: 657 nvfx_vp_emit(vpc, arith(sat, VEC, SFL, dst, mask, src[0], src[1], none)); 658 break; 659 case TGSI_OPCODE_SGE: 660 nvfx_vp_emit(vpc, arith(sat, VEC, SGE, dst, mask, src[0], src[1], none)); 661 break; 662 case TGSI_OPCODE_SGT: 663 nvfx_vp_emit(vpc, arith(sat, VEC, SGT, dst, mask, src[0], src[1], none)); 664 break; 665 case TGSI_OPCODE_SIN: 666 nvfx_vp_emit(vpc, arith(sat, SCA, SIN, dst, mask, none, none, src[0])); 667 break; 668 case TGSI_OPCODE_SLE: 669 nvfx_vp_emit(vpc, arith(sat, VEC, SLE, dst, mask, src[0], src[1], none)); 670 break; 671 case TGSI_OPCODE_SLT: 672 nvfx_vp_emit(vpc, arith(sat, VEC, SLT, dst, mask, src[0], src[1], none)); 673 break; 674 case TGSI_OPCODE_SNE: 675 nvfx_vp_emit(vpc, arith(sat, VEC, SNE, dst, mask, src[0], src[1], none)); 676 break; 677 case TGSI_OPCODE_SSG: 678 nvfx_vp_emit(vpc, arith(sat, VEC, SSG, dst, mask, src[0], none, none)); 679 break; 680 case TGSI_OPCODE_STR: 681 nvfx_vp_emit(vpc, arith(sat, VEC, STR, dst, mask, src[0], src[1], none)); 682 break; 683 case TGSI_OPCODE_SUB: 684 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, neg(src[1]))); 685 break; 686 case TGSI_OPCODE_TRUNC: 687 tmp = nvfx_src(temp(vpc)); 688 insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); 689 insn.cc_update = 1; 690 nvfx_vp_emit(vpc, insn); 691 692 nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, abs(src[0]), none, none)); 693 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, tmp, none, none)); 694 695 insn = arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none); 696 insn.cc_test = NVFX_COND_LT; 697 nvfx_vp_emit(vpc, insn); 698 break; 699 case TGSI_OPCODE_XPD: 700 tmp = nvfx_src(temp(vpc)); 701 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none)); 702 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp))); 703 break; 704 case TGSI_OPCODE_IF: 705 insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none); 706 insn.cc_update = 1; 707 nvfx_vp_emit(vpc, insn); 708 709 reloc.location = vpc->vp->nr_insns; 710 reloc.target = finst->Label.Label + 1; 711 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 712 713 insn = arith(0, SCA, BRA, none.reg, 0, none, none, none); 714 insn.cc_test = NVFX_COND_EQ; 715 insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0; 716 nvfx_vp_emit(vpc, insn); 717 break; 718 case TGSI_OPCODE_ELSE: 719 case TGSI_OPCODE_BRA: 720 case TGSI_OPCODE_CAL: 721 reloc.location = vpc->vp->nr_insns; 722 reloc.target = finst->Label.Label; 723 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 724 725 if(finst->Instruction.Opcode == TGSI_OPCODE_CAL) 726 insn = arith(0, SCA, CAL, none.reg, 0, none, none, none); 727 else 728 insn = arith(0, SCA, BRA, none.reg, 0, none, none, none); 729 nvfx_vp_emit(vpc, insn); 730 break; 731 case TGSI_OPCODE_RET: 732 if(sub_depth || !vpc->vp->enabled_ucps) { 733 tmp = none; 734 tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0; 735 nvfx_vp_emit(vpc, arith(0, SCA, RET, none.reg, 0, none, none, tmp)); 736 } else { 737 reloc.location = vpc->vp->nr_insns; 738 reloc.target = vpc->info->num_instructions; 739 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 740 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 741 } 742 break; 743 case TGSI_OPCODE_BGNSUB: 744 ++sub_depth; 745 break; 746 case TGSI_OPCODE_ENDSUB: 747 --sub_depth; 748 break; 749 case TGSI_OPCODE_ENDIF: 750 /* nothing to do here */ 751 break; 752 case TGSI_OPCODE_BGNLOOP: 753 loop.cont_target = idx; 754 loop.brk_target = finst->Label.Label + 1; 755 util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop); 756 break; 757 case TGSI_OPCODE_ENDLOOP: 758 loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry); 759 760 reloc.location = vpc->vp->nr_insns; 761 reloc.target = loop.cont_target; 762 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 763 764 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 765 break; 766 case TGSI_OPCODE_CONT: 767 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry); 768 769 reloc.location = vpc->vp->nr_insns; 770 reloc.target = loop.cont_target; 771 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 772 773 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 774 break; 775 case TGSI_OPCODE_BRK: 776 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry); 777 778 reloc.location = vpc->vp->nr_insns; 779 reloc.target = loop.brk_target; 780 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 781 782 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 783 break; 784 case TGSI_OPCODE_END: 785 assert(!sub_depth); 786 if(vpc->vp->enabled_ucps) { 787 if(idx != (vpc->info->num_instructions - 1)) { 788 reloc.location = vpc->vp->nr_insns; 789 reloc.target = vpc->info->num_instructions; 790 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 791 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 792 } 793 } else { 794 if(vpc->vp->nr_insns) 795 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; 796 nvfx_vp_emit(vpc, arith(0, VEC, NOP, none.reg, 0, none, none, none)); 797 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; 798 } 799 break; 800 default: 801 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); 802 return FALSE; 803 } 804 805 if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE && !nv30->use_nv4x) { 806 if (!vpc->r_0_1.type) 807 vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0); 808 nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none)); 809 nvfx_vp_emit(vpc, arith(0, VEC, MIN, final_dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), Y, Y, Y, Y), none)); 810 } 811 812 release_temps(vpc); 813 return TRUE; 814 } 815 816 static boolean 817 nvfx_vertprog_parse_decl_output(struct nv30_context *nv30, struct nvfx_vpc *vpc, 818 const struct tgsi_full_declaration *fdec) 819 { 820 unsigned num_texcoords = nv30->is_nv4x ? 10 : 8; 821 unsigned idx = fdec->Range.First; 822 int hw = 0, i; 823 824 switch (fdec->Semantic.Name) { 825 case TGSI_SEMANTIC_POSITION: 826 hw = NVFX_VP(INST_DEST_POS); 827 vpc->hpos_idx = idx; 828 break; 829 case TGSI_SEMANTIC_CLIPVERTEX: 830 vpc->r_result[idx] = temp(vpc); 831 vpc->r_temps_discard = 0; 832 vpc->cvtx_idx = idx; 833 return TRUE; 834 case TGSI_SEMANTIC_COLOR: 835 if (fdec->Semantic.Index == 0) { 836 hw = NVFX_VP(INST_DEST_COL0); 837 } else 838 if (fdec->Semantic.Index == 1) { 839 hw = NVFX_VP(INST_DEST_COL1); 840 } else { 841 NOUVEAU_ERR("bad colour semantic index\n"); 842 return FALSE; 843 } 844 break; 845 case TGSI_SEMANTIC_BCOLOR: 846 if (fdec->Semantic.Index == 0) { 847 hw = NVFX_VP(INST_DEST_BFC0); 848 } else 849 if (fdec->Semantic.Index == 1) { 850 hw = NVFX_VP(INST_DEST_BFC1); 851 } else { 852 NOUVEAU_ERR("bad bcolour semantic index\n"); 853 return FALSE; 854 } 855 break; 856 case TGSI_SEMANTIC_FOG: 857 hw = NVFX_VP(INST_DEST_FOGC); 858 break; 859 case TGSI_SEMANTIC_PSIZE: 860 hw = NVFX_VP(INST_DEST_PSZ); 861 break; 862 case TGSI_SEMANTIC_GENERIC: 863 for (i = 0; i < num_texcoords; i++) { 864 if (vpc->vp->texcoord[i] == fdec->Semantic.Index) { 865 hw = NVFX_VP(INST_DEST_TC(i)); 866 break; 867 } 868 } 869 870 if (i == num_texcoords) { 871 vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0); 872 return TRUE; 873 } 874 break; 875 case TGSI_SEMANTIC_EDGEFLAG: 876 /* not really an error just a fallback */ 877 NOUVEAU_ERR("cannot handle edgeflag output\n"); 878 return FALSE; 879 default: 880 NOUVEAU_ERR("bad output semantic\n"); 881 return FALSE; 882 } 883 884 vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); 885 return TRUE; 886 } 887 888 static boolean 889 nvfx_vertprog_prepare(struct nv30_context *nv30, struct nvfx_vpc *vpc) 890 { 891 struct tgsi_parse_context p; 892 int high_const = -1, high_temp = -1, high_addr = -1, nr_imm = 0, i; 893 894 tgsi_parse_init(&p, vpc->pipe.tokens); 895 while (!tgsi_parse_end_of_tokens(&p)) { 896 const union tgsi_full_token *tok = &p.FullToken; 897 898 tgsi_parse_token(&p); 899 switch(tok->Token.Type) { 900 case TGSI_TOKEN_TYPE_IMMEDIATE: 901 nr_imm++; 902 break; 903 case TGSI_TOKEN_TYPE_DECLARATION: 904 { 905 const struct tgsi_full_declaration *fdec; 906 907 fdec = &p.FullToken.FullDeclaration; 908 switch (fdec->Declaration.File) { 909 case TGSI_FILE_TEMPORARY: 910 if (fdec->Range.Last > high_temp) { 911 high_temp = 912 fdec->Range.Last; 913 } 914 break; 915 case TGSI_FILE_ADDRESS: 916 if (fdec->Range.Last > high_addr) { 917 high_addr = 918 fdec->Range.Last; 919 } 920 break; 921 case TGSI_FILE_CONSTANT: 922 if (fdec->Range.Last > high_const) { 923 high_const = 924 fdec->Range.Last; 925 } 926 break; 927 case TGSI_FILE_OUTPUT: 928 if (!nvfx_vertprog_parse_decl_output(nv30, vpc, fdec)) 929 return FALSE; 930 break; 931 default: 932 break; 933 } 934 } 935 break; 936 default: 937 break; 938 } 939 } 940 tgsi_parse_free(&p); 941 942 if (nr_imm) { 943 vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg)); 944 assert(vpc->imm); 945 } 946 947 if (++high_temp) { 948 vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); 949 for (i = 0; i < high_temp; i++) 950 vpc->r_temp[i] = temp(vpc); 951 } 952 953 if (++high_addr) { 954 vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg)); 955 for (i = 0; i < high_addr; i++) 956 vpc->r_address[i] = nvfx_reg(NVFXSR_TEMP, i); 957 } 958 959 if(++high_const) { 960 vpc->r_const = CALLOC(high_const, sizeof(struct nvfx_reg)); 961 for (i = 0; i < high_const; i++) 962 vpc->r_const[i] = constant(vpc, i, 0, 0, 0, 0); 963 } 964 965 vpc->r_temps_discard = 0; 966 return TRUE; 967 } 968 969 DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE) 970 971 boolean 972 _nvfx_vertprog_translate(struct nv30_context *nv30, struct nv30_vertprog *vp) 973 { 974 struct tgsi_parse_context parse; 975 struct nvfx_vpc *vpc = NULL; 976 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 977 struct util_dynarray insns; 978 int i, ucps; 979 980 vp->translated = FALSE; 981 vp->nr_insns = 0; 982 vp->nr_consts = 0; 983 984 vpc = CALLOC_STRUCT(nvfx_vpc); 985 if (!vpc) 986 return FALSE; 987 vpc->nv30 = nv30; 988 vpc->vp = vp; 989 vpc->pipe = vp->pipe; 990 vpc->info = &vp->info; 991 vpc->cvtx_idx = -1; 992 993 if (!nvfx_vertprog_prepare(nv30, vpc)) { 994 FREE(vpc); 995 return FALSE; 996 } 997 998 /* Redirect post-transform vertex position to a temp if user clip 999 * planes are enabled. We need to append code to the vtxprog 1000 * to handle clip planes later. 1001 */ 1002 if (vp->enabled_ucps && vpc->cvtx_idx < 0) { 1003 vpc->r_result[vpc->hpos_idx] = temp(vpc); 1004 vpc->r_temps_discard = 0; 1005 vpc->cvtx_idx = vpc->hpos_idx; 1006 } 1007 1008 util_dynarray_init(&insns); 1009 1010 tgsi_parse_init(&parse, vp->pipe.tokens); 1011 while (!tgsi_parse_end_of_tokens(&parse)) { 1012 tgsi_parse_token(&parse); 1013 1014 switch (parse.FullToken.Token.Type) { 1015 case TGSI_TOKEN_TYPE_IMMEDIATE: 1016 { 1017 const struct tgsi_full_immediate *imm; 1018 1019 imm = &parse.FullToken.FullImmediate; 1020 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); 1021 assert(imm->Immediate.NrTokens == 4 + 1); 1022 vpc->imm[vpc->nr_imm++] = 1023 constant(vpc, -1, 1024 imm->u[0].Float, 1025 imm->u[1].Float, 1026 imm->u[2].Float, 1027 imm->u[3].Float); 1028 } 1029 break; 1030 case TGSI_TOKEN_TYPE_INSTRUCTION: 1031 { 1032 const struct tgsi_full_instruction *finst; 1033 unsigned idx = insns.size >> 2; 1034 util_dynarray_append(&insns, unsigned, vp->nr_insns); 1035 finst = &parse.FullToken.FullInstruction; 1036 if (!nvfx_vertprog_parse_instruction(nv30, vpc, idx, finst)) 1037 goto out; 1038 } 1039 break; 1040 default: 1041 break; 1042 } 1043 } 1044 1045 util_dynarray_append(&insns, unsigned, vp->nr_insns); 1046 1047 for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) 1048 { 1049 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i); 1050 struct nvfx_relocation hw_reloc; 1051 1052 hw_reloc.location = label_reloc->location; 1053 hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target]; 1054 1055 //debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target); 1056 1057 util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc); 1058 } 1059 util_dynarray_fini(&insns); 1060 util_dynarray_trim(&vp->branch_relocs); 1061 1062 /* XXX: what if we add a RET before?! make sure we jump here...*/ 1063 1064 /* Write out HPOS if it was redirected to a temp earlier */ 1065 if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { 1066 struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT, 1067 NVFX_VP(INST_DEST_POS)); 1068 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]); 1069 1070 nvfx_vp_emit(vpc, arith(0, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none)); 1071 } 1072 1073 /* Insert code to handle user clip planes */ 1074 ucps = vp->enabled_ucps; 1075 while (ucps) { 1076 int i = ffs(ucps) - 1; ucps &= ~(1 << i); 1077 struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i)); 1078 struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, 512 + i)); 1079 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->cvtx_idx]); 1080 unsigned mask; 1081 1082 if(nv30->is_nv4x) 1083 { 1084 switch (i) { 1085 case 0: case 3: mask = NVFX_VP_MASK_Y; break; 1086 case 1: case 4: mask = NVFX_VP_MASK_Z; break; 1087 case 2: case 5: mask = NVFX_VP_MASK_W; break; 1088 default: 1089 NOUVEAU_ERR("invalid clip dist #%d\n", i); 1090 goto out; 1091 } 1092 } 1093 else 1094 mask = NVFX_VP_MASK_X; 1095 1096 nvfx_vp_emit(vpc, arith(0, VEC, DP4, cdst, mask, htmp, ceqn, none)); 1097 } 1098 1099 if (vpc->vp->nr_insns) 1100 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; 1101 1102 if(debug_get_option_nvfx_dump_vp()) 1103 { 1104 debug_printf("\n"); 1105 tgsi_dump(vpc->pipe.tokens, 0); 1106 1107 debug_printf("\n%s vertex program:\n", nv30->is_nv4x ? "nv4x" : "nv3x"); 1108 for (i = 0; i < vp->nr_insns; i++) 1109 debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]); 1110 debug_printf("\n"); 1111 } 1112 1113 vp->translated = TRUE; 1114 1115 out: 1116 tgsi_parse_free(&parse); 1117 if(vpc) { 1118 util_dynarray_fini(&vpc->label_relocs); 1119 util_dynarray_fini(&vpc->loop_stack); 1120 FREE(vpc->r_temp); 1121 FREE(vpc->r_address); 1122 FREE(vpc->r_const); 1123 FREE(vpc->imm); 1124 FREE(vpc); 1125 } 1126 1127 return vp->translated; 1128 } 1129