1 /* 2 * Copyright (C) 2005 Ben Skeggs. 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28 /** 29 * \file 30 * 31 * Emit the r300_fragment_program_code that can be understood by the hardware. 32 * Input is a pre-transformed radeon_program. 33 * 34 * \author Ben Skeggs <darktama (at) iinet.net.au> 35 * 36 * \author Jerome Glisse <j.glisse (at) gmail.com> 37 */ 38 39 #include "r300_fragprog.h" 40 41 #include "../r300_reg.h" 42 43 #include "radeon_program_pair.h" 44 #include "r300_fragprog_swizzle.h" 45 46 47 struct r300_emit_state { 48 struct r300_fragment_program_compiler * compiler; 49 50 unsigned current_node : 2; 51 unsigned node_first_tex : 8; 52 unsigned node_first_alu : 8; 53 uint32_t node_flags; 54 }; 55 56 #define PROG_CODE \ 57 struct r300_fragment_program_compiler *c = emit->compiler; \ 58 struct r300_fragment_program_code *code = &c->code->code.r300 59 60 #define error(fmt, args...) do { \ 61 rc_error(&c->Base, "%s::%s(): " fmt "\n", \ 62 __FILE__, __FUNCTION__, ##args); \ 63 } while(0) 64 65 static unsigned int get_msbs_alu(unsigned int bits) 66 { 67 return (bits >> 6) & 0x7; 68 } 69 70 /** 71 * @param lsbs The number of least significant bits 72 */ 73 static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) 74 { 75 return (bits >> lsbs) & 0x15; 76 } 77 78 #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) 79 80 /** 81 * Mark a temporary register as used. 82 */ 83 static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) 84 { 85 if (index > code->pixsize) 86 code->pixsize = index; 87 } 88 89 static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) 90 { 91 if (!src.Used) 92 return 0; 93 94 if (src.File == RC_FILE_CONSTANT) { 95 return src.Index | (1 << 5); 96 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { 97 use_temporary(code, src.Index); 98 return src.Index & 0x1f; 99 } 100 101 return 0; 102 } 103 104 105 static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) 106 { 107 switch(opcode) { 108 case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; 109 case RC_OPCODE_CND: return R300_ALU_OUTC_CND; 110 case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; 111 case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; 112 case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; 113 default: 114 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); 115 /* fall through */ 116 case RC_OPCODE_NOP: 117 /* fall through */ 118 case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; 119 case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; 120 case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; 121 case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; 122 } 123 } 124 125 static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) 126 { 127 switch(opcode) { 128 case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; 129 case RC_OPCODE_CND: return R300_ALU_OUTA_CND; 130 case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; 131 case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; 132 case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; 133 case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; 134 case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; 135 default: 136 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); 137 /* fall through */ 138 case RC_OPCODE_NOP: 139 /* fall through */ 140 case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; 141 case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; 142 case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; 143 case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; 144 case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; 145 } 146 } 147 148 /** 149 * Emit one paired ALU instruction. 150 */ 151 static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) 152 { 153 int ip; 154 int j; 155 PROG_CODE; 156 157 if (code->alu.length >= c->Base.max_alu_insts) { 158 error("Too many ALU instructions"); 159 return 0; 160 } 161 162 ip = code->alu.length++; 163 164 code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); 165 code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); 166 167 for(j = 0; j < 3; ++j) { 168 /* Set the RGB address */ 169 unsigned int src = use_source(code, inst->RGB.Src[j]); 170 unsigned int arg; 171 if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) 172 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); 173 174 code->alu.inst[ip].rgb_addr |= src << (6*j); 175 176 /* Set the Alpha address */ 177 src = use_source(code, inst->Alpha.Src[j]); 178 if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) 179 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); 180 181 code->alu.inst[ip].alpha_addr |= src << (6*j); 182 183 arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); 184 arg |= inst->RGB.Arg[j].Abs << 6; 185 arg |= inst->RGB.Arg[j].Negate << 5; 186 code->alu.inst[ip].rgb_inst |= arg << (7*j); 187 188 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); 189 arg |= inst->Alpha.Arg[j].Abs << 6; 190 arg |= inst->Alpha.Arg[j].Negate << 5; 191 code->alu.inst[ip].alpha_inst |= arg << (7*j); 192 } 193 194 /* Presubtract */ 195 if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { 196 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { 197 case RC_PRESUB_BIAS: 198 code->alu.inst[ip].rgb_inst |= 199 R300_ALU_SRCP_1_MINUS_2_SRC0; 200 break; 201 case RC_PRESUB_ADD: 202 code->alu.inst[ip].rgb_inst |= 203 R300_ALU_SRCP_SRC1_PLUS_SRC0; 204 break; 205 case RC_PRESUB_SUB: 206 code->alu.inst[ip].rgb_inst |= 207 R300_ALU_SRCP_SRC1_MINUS_SRC0; 208 break; 209 case RC_PRESUB_INV: 210 code->alu.inst[ip].rgb_inst |= 211 R300_ALU_SRCP_1_MINUS_SRC0; 212 break; 213 default: 214 break; 215 } 216 } 217 218 if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { 219 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { 220 case RC_PRESUB_BIAS: 221 code->alu.inst[ip].alpha_inst |= 222 R300_ALU_SRCP_1_MINUS_2_SRC0; 223 break; 224 case RC_PRESUB_ADD: 225 code->alu.inst[ip].alpha_inst |= 226 R300_ALU_SRCP_SRC1_PLUS_SRC0; 227 break; 228 case RC_PRESUB_SUB: 229 code->alu.inst[ip].alpha_inst |= 230 R300_ALU_SRCP_SRC1_MINUS_SRC0; 231 break; 232 case RC_PRESUB_INV: 233 code->alu.inst[ip].alpha_inst |= 234 R300_ALU_SRCP_1_MINUS_SRC0; 235 break; 236 default: 237 break; 238 } 239 } 240 241 if (inst->RGB.Saturate) 242 code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; 243 if (inst->Alpha.Saturate) 244 code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; 245 246 if (inst->RGB.WriteMask) { 247 use_temporary(code, inst->RGB.DestIndex); 248 if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) 249 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; 250 code->alu.inst[ip].rgb_addr |= 251 ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | 252 (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); 253 } 254 if (inst->RGB.OutputWriteMask) { 255 code->alu.inst[ip].rgb_addr |= 256 (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | 257 R300_RGB_TARGET(inst->RGB.Target); 258 emit->node_flags |= R300_RGBA_OUT; 259 } 260 261 if (inst->Alpha.WriteMask) { 262 use_temporary(code, inst->Alpha.DestIndex); 263 if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) 264 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; 265 code->alu.inst[ip].alpha_addr |= 266 ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | 267 R300_ALU_DSTA_REG; 268 } 269 if (inst->Alpha.OutputWriteMask) { 270 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | 271 R300_ALPHA_TARGET(inst->Alpha.Target); 272 emit->node_flags |= R300_RGBA_OUT; 273 } 274 if (inst->Alpha.DepthWriteMask) { 275 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; 276 emit->node_flags |= R300_W_OUT; 277 c->code->writes_depth = 1; 278 } 279 if (inst->Nop) 280 code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; 281 282 /* Handle Output Modifier 283 * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */ 284 if (inst->RGB.Omod) { 285 if (inst->RGB.Omod == RC_OMOD_DISABLE) { 286 rc_error(&c->Base, "RC_OMOD_DISABLE not supported"); 287 } 288 code->alu.inst[ip].rgb_inst |= 289 (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT); 290 } 291 if (inst->Alpha.Omod) { 292 if (inst->Alpha.Omod == RC_OMOD_DISABLE) { 293 rc_error(&c->Base, "RC_OMOD_DISABLE not supported"); 294 } 295 code->alu.inst[ip].alpha_inst |= 296 (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT); 297 } 298 return 1; 299 } 300 301 302 /** 303 * Finish the current node without advancing to the next one. 304 */ 305 static int finish_node(struct r300_emit_state * emit) 306 { 307 struct r300_fragment_program_compiler * c = emit->compiler; 308 struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; 309 unsigned alu_offset; 310 unsigned alu_end; 311 unsigned tex_offset; 312 unsigned tex_end; 313 314 unsigned int alu_offset_msbs, alu_end_msbs; 315 316 if (code->alu.length == emit->node_first_alu) { 317 /* Generate a single NOP for this node */ 318 struct rc_pair_instruction inst; 319 memset(&inst, 0, sizeof(inst)); 320 if (!emit_alu(emit, &inst)) 321 return 0; 322 } 323 324 alu_offset = emit->node_first_alu; 325 alu_end = code->alu.length - alu_offset - 1; 326 tex_offset = emit->node_first_tex; 327 tex_end = code->tex.length - tex_offset - 1; 328 329 if (code->tex.length == emit->node_first_tex) { 330 if (emit->current_node > 0) { 331 error("Node %i has no TEX instructions", emit->current_node); 332 return 0; 333 } 334 335 tex_end = 0; 336 } else { 337 if (emit->current_node == 0) 338 code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; 339 } 340 341 /* Write the config register. 342 * Note: The order in which the words for each node are written 343 * is not correct here and needs to be fixed up once we're entirely 344 * done 345 * 346 * Also note that the register specification from AMD is slightly 347 * incorrect in its description of this register. */ 348 code->code_addr[emit->current_node] = 349 ((alu_offset << R300_ALU_START_SHIFT) 350 & R300_ALU_START_MASK) 351 | ((alu_end << R300_ALU_SIZE_SHIFT) 352 & R300_ALU_SIZE_MASK) 353 | ((tex_offset << R300_TEX_START_SHIFT) 354 & R300_TEX_START_MASK) 355 | ((tex_end << R300_TEX_SIZE_SHIFT) 356 & R300_TEX_SIZE_MASK) 357 | emit->node_flags 358 | (get_msbs_tex(tex_offset, 5) 359 << R400_TEX_START_MSB_SHIFT) 360 | (get_msbs_tex(tex_end, 5) 361 << R400_TEX_SIZE_MSB_SHIFT) 362 ; 363 364 /* Write r400 extended instruction fields. These will be ignored on 365 * r300 cards. */ 366 alu_offset_msbs = get_msbs_alu(alu_offset); 367 alu_end_msbs = get_msbs_alu(alu_end); 368 switch(emit->current_node) { 369 case 0: 370 code->r400_code_offset_ext |= 371 alu_offset_msbs << R400_ALU_START3_MSB_SHIFT 372 | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; 373 break; 374 case 1: 375 code->r400_code_offset_ext |= 376 alu_offset_msbs << R400_ALU_START2_MSB_SHIFT 377 | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; 378 break; 379 case 2: 380 code->r400_code_offset_ext |= 381 alu_offset_msbs << R400_ALU_START1_MSB_SHIFT 382 | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; 383 break; 384 case 3: 385 code->r400_code_offset_ext |= 386 alu_offset_msbs << R400_ALU_START0_MSB_SHIFT 387 | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; 388 break; 389 } 390 return 1; 391 } 392 393 394 /** 395 * Begin a block of texture instructions. 396 * Create the necessary indirection. 397 */ 398 static int begin_tex(struct r300_emit_state * emit) 399 { 400 PROG_CODE; 401 402 if (code->alu.length == emit->node_first_alu && 403 code->tex.length == emit->node_first_tex) { 404 return 1; 405 } 406 407 if (emit->current_node == 3) { 408 error("Too many texture indirections"); 409 return 0; 410 } 411 412 if (!finish_node(emit)) 413 return 0; 414 415 emit->current_node++; 416 emit->node_first_tex = code->tex.length; 417 emit->node_first_alu = code->alu.length; 418 emit->node_flags = 0; 419 return 1; 420 } 421 422 423 static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) 424 { 425 unsigned int unit; 426 unsigned int dest; 427 unsigned int opcode; 428 PROG_CODE; 429 430 if (code->tex.length >= emit->compiler->Base.max_tex_insts) { 431 error("Too many TEX instructions"); 432 return 0; 433 } 434 435 unit = inst->U.I.TexSrcUnit; 436 dest = inst->U.I.DstReg.Index; 437 438 switch(inst->U.I.Opcode) { 439 case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; 440 case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; 441 case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; 442 case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; 443 default: 444 error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); 445 return 0; 446 } 447 448 if (inst->U.I.Opcode == RC_OPCODE_KIL) { 449 unit = 0; 450 dest = 0; 451 } else { 452 use_temporary(code, dest); 453 } 454 455 use_temporary(code, inst->U.I.SrcReg[0].Index); 456 457 code->tex.inst[code->tex.length++] = 458 ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) 459 & R300_SRC_ADDR_MASK) 460 | ((dest << R300_DST_ADDR_SHIFT) 461 & R300_DST_ADDR_MASK) 462 | (unit << R300_TEX_ID_SHIFT) 463 | (opcode << R300_TEX_INST_SHIFT) 464 | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? 465 R400_SRC_ADDR_EXT_BIT : 0) 466 | (dest >= R300_PFS_NUM_TEMP_REGS ? 467 R400_DST_ADDR_EXT_BIT : 0) 468 ; 469 return 1; 470 } 471 472 473 /** 474 * Final compilation step: Turn the intermediate radeon_program into 475 * machine-readable instructions. 476 */ 477 void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) 478 { 479 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; 480 struct r300_emit_state emit; 481 struct r300_fragment_program_code *code = &compiler->code->code.r300; 482 unsigned int tex_end; 483 484 memset(&emit, 0, sizeof(emit)); 485 emit.compiler = compiler; 486 487 memset(code, 0, sizeof(struct r300_fragment_program_code)); 488 489 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; 490 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; 491 inst = inst->Next) { 492 if (inst->Type == RC_INSTRUCTION_NORMAL) { 493 if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { 494 begin_tex(&emit); 495 continue; 496 } 497 498 emit_tex(&emit, inst); 499 } else { 500 emit_alu(&emit, &inst->U.P); 501 } 502 } 503 504 if (code->pixsize >= compiler->Base.max_temp_regs) 505 rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); 506 507 if (compiler->Base.Error) 508 return; 509 510 /* Finish the program */ 511 finish_node(&emit); 512 513 code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ 514 515 /* Set r400 extended instruction fields. These values will be ignored 516 * on r300 cards. */ 517 code->r400_code_offset_ext |= 518 (get_msbs_alu(0) 519 << R400_ALU_OFFSET_MSB_SHIFT) 520 | (get_msbs_alu(code->alu.length - 1) 521 << R400_ALU_SIZE_MSB_SHIFT); 522 523 tex_end = code->tex.length ? code->tex.length - 1 : 0; 524 code->code_offset = 525 ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) 526 & R300_PFS_CNTL_ALU_OFFSET_MASK) 527 | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) 528 & R300_PFS_CNTL_ALU_END_MASK) 529 | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) 530 & R300_PFS_CNTL_TEX_OFFSET_MASK) 531 | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) 532 & R300_PFS_CNTL_TEX_END_MASK) 533 | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) 534 | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) 535 ; 536 537 if (emit.current_node < 3) { 538 int shift = 3 - emit.current_node; 539 int i; 540 for(i = emit.current_node; i >= 0; --i) 541 code->code_addr[shift + i] = code->code_addr[i]; 542 for(i = 0; i < shift; ++i) 543 code->code_addr[i] = 0; 544 } 545 546 if (code->pixsize >= R300_PFS_NUM_TEMP_REGS 547 || code->alu.length > R300_PFS_MAX_ALU_INST 548 || code->tex.length > R300_PFS_MAX_TEX_INST) { 549 550 code->r390_mode = 1; 551 } 552 } 553