1 /* 2 * Copyright (C) 2004 David Airlie All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 */ 21 22 #include "main/glheader.h" 23 #include "main/colormac.h" 24 #include "main/macros.h" 25 #include "main/atifragshader.h" 26 #include "main/samplerobj.h" 27 #include "swrast/s_atifragshader.h" 28 #include "swrast/s_context.h" 29 30 31 /** 32 * State for executing ATI fragment shader. 33 */ 34 struct atifs_machine 35 { 36 GLfloat Registers[6][4]; /** six temporary registers */ 37 GLfloat PrevPassRegisters[6][4]; 38 GLfloat Inputs[2][4]; /** Primary, secondary input colors */ 39 }; 40 41 42 43 /** 44 * Fetch a texel. 45 */ 46 static void 47 fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda, 48 GLuint unit, GLfloat color[4]) 49 { 50 SWcontext *swrast = SWRAST_CONTEXT(ctx); 51 52 /* XXX use a float-valued TextureSample routine here!!! */ 53 swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit), 54 ctx->Texture.Unit[unit]._Current, 55 1, (const GLfloat(*)[4]) texcoord, 56 &lambda, (GLfloat (*)[4]) color); 57 } 58 59 static void 60 apply_swizzle(GLfloat values[4], GLuint swizzle) 61 { 62 GLfloat s, t, r, q; 63 64 s = values[0]; 65 t = values[1]; 66 r = values[2]; 67 q = values[3]; 68 69 switch (swizzle) { 70 case GL_SWIZZLE_STR_ATI: 71 values[0] = s; 72 values[1] = t; 73 values[2] = r; 74 break; 75 case GL_SWIZZLE_STQ_ATI: 76 values[0] = s; 77 values[1] = t; 78 values[2] = q; 79 break; 80 case GL_SWIZZLE_STR_DR_ATI: 81 values[0] = s / r; 82 values[1] = t / r; 83 values[2] = 1 / r; 84 break; 85 case GL_SWIZZLE_STQ_DQ_ATI: 86 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */ 87 if (q == 0.0F) 88 q = 0.000000001F; 89 values[0] = s / q; 90 values[1] = t / q; 91 values[2] = 1.0F / q; 92 break; 93 } 94 values[3] = 0.0; 95 } 96 97 static void 98 apply_src_rep(GLint optype, GLuint rep, GLfloat * val) 99 { 100 GLint i; 101 GLint start, end; 102 if (!rep) 103 return; 104 105 start = optype ? 3 : 0; 106 end = 4; 107 108 for (i = start; i < end; i++) { 109 switch (rep) { 110 case GL_RED: 111 val[i] = val[0]; 112 break; 113 case GL_GREEN: 114 val[i] = val[1]; 115 break; 116 case GL_BLUE: 117 val[i] = val[2]; 118 break; 119 case GL_ALPHA: 120 val[i] = val[3]; 121 break; 122 } 123 } 124 } 125 126 static void 127 apply_src_mod(GLint optype, GLuint mod, GLfloat * val) 128 { 129 GLint i; 130 GLint start, end; 131 132 if (!mod) 133 return; 134 135 start = optype ? 3 : 0; 136 end = 4; 137 138 for (i = start; i < end; i++) { 139 if (mod & GL_COMP_BIT_ATI) 140 val[i] = 1 - val[i]; 141 142 if (mod & GL_BIAS_BIT_ATI) 143 val[i] = val[i] - 0.5F; 144 145 if (mod & GL_2X_BIT_ATI) 146 val[i] = 2 * val[i]; 147 148 if (mod & GL_NEGATE_BIT_ATI) 149 val[i] = -val[i]; 150 } 151 } 152 153 static void 154 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val) 155 { 156 GLint i; 157 GLint has_sat = mod & GL_SATURATE_BIT_ATI; 158 GLint start, end; 159 160 mod &= ~GL_SATURATE_BIT_ATI; 161 162 start = optype ? 3 : 0; 163 end = optype ? 4 : 3; 164 165 for (i = start; i < end; i++) { 166 switch (mod) { 167 case GL_2X_BIT_ATI: 168 val[i] = 2 * val[i]; 169 break; 170 case GL_4X_BIT_ATI: 171 val[i] = 4 * val[i]; 172 break; 173 case GL_8X_BIT_ATI: 174 val[i] = 8 * val[i]; 175 break; 176 case GL_HALF_BIT_ATI: 177 val[i] = val[i] * 0.5F; 178 break; 179 case GL_QUARTER_BIT_ATI: 180 val[i] = val[i] * 0.25F; 181 break; 182 case GL_EIGHTH_BIT_ATI: 183 val[i] = val[i] * 0.125F; 184 break; 185 } 186 187 if (has_sat) { 188 if (val[i] < 0.0F) 189 val[i] = 0.0F; 190 else if (val[i] > 1.0F) 191 val[i] = 1.0F; 192 } 193 else { 194 if (val[i] < -8.0F) 195 val[i] = -8.0F; 196 else if (val[i] > 8.0F) 197 val[i] = 8.0F; 198 } 199 } 200 } 201 202 203 static void 204 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src, 205 GLfloat * dst) 206 { 207 GLint i; 208 apply_dst_mod(optype, mod, src); 209 210 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) { 211 if (mask) { 212 if (mask & GL_RED_BIT_ATI) 213 dst[0] = src[0]; 214 215 if (mask & GL_GREEN_BIT_ATI) 216 dst[1] = src[1]; 217 218 if (mask & GL_BLUE_BIT_ATI) 219 dst[2] = src[2]; 220 } 221 else { 222 for (i = 0; i < 3; i++) 223 dst[i] = src[i]; 224 } 225 } 226 else 227 dst[3] = src[3]; 228 } 229 230 static void 231 finish_pass(struct atifs_machine *machine) 232 { 233 GLint i; 234 235 for (i = 0; i < 6; i++) { 236 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]); 237 } 238 } 239 240 struct ati_fs_opcode_st ati_fs_opcodes[] = { 241 {GL_ADD_ATI, 2}, 242 {GL_SUB_ATI, 2}, 243 {GL_MUL_ATI, 2}, 244 {GL_MAD_ATI, 3}, 245 {GL_LERP_ATI, 3}, 246 {GL_MOV_ATI, 1}, 247 {GL_CND_ATI, 3}, 248 {GL_CND0_ATI, 3}, 249 {GL_DOT2_ADD_ATI, 3}, 250 {GL_DOT3_ATI, 2}, 251 {GL_DOT4_ATI, 2} 252 }; 253 254 255 256 static void 257 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst, 258 const SWspan *span, GLuint column, GLuint idx) 259 { 260 GLuint swizzle = texinst->swizzle; 261 GLuint pass_tex = texinst->src; 262 263 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { 264 pass_tex -= GL_TEXTURE0_ARB; 265 COPY_4V(machine->Registers[idx], 266 span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]); 267 } 268 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) { 269 pass_tex -= GL_REG_0_ATI; 270 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]); 271 } 272 apply_swizzle(machine->Registers[idx], swizzle); 273 274 } 275 276 static void 277 handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine, 278 struct atifs_setupinst *texinst, const SWspan *span, 279 GLuint column, GLuint idx) 280 { 281 /* sample from unit idx using texinst->src as coords */ 282 GLuint swizzle = texinst->swizzle; 283 GLuint coord_source = texinst->src; 284 GLfloat tex_coords[4] = { 0 }; 285 286 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) { 287 coord_source -= GL_TEXTURE0_ARB; 288 COPY_4V(tex_coords, 289 span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]); 290 } 291 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) { 292 coord_source -= GL_REG_0_ATI; 293 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]); 294 } 295 apply_swizzle(tex_coords, swizzle); 296 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]); 297 } 298 299 #define SETUP_SRC_REG(optype, i, x) \ 300 do { \ 301 COPY_4V(src[optype][i], x); \ 302 } while (0) 303 304 305 306 /** 307 * Execute the given fragment shader. 308 * NOTE: we do everything in single-precision floating point 309 * \param ctx - rendering context 310 * \param shader - the shader to execute 311 * \param machine - virtual machine state 312 * \param span - the SWspan we're operating on 313 * \param column - which pixel [i] we're operating on in the span 314 */ 315 static void 316 execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader, 317 struct atifs_machine *machine, const SWspan *span, 318 GLuint column) 319 { 320 GLuint pc; 321 struct atifs_instruction *inst; 322 struct atifs_setupinst *texinst; 323 GLint optype; 324 GLuint i; 325 GLint j, pass; 326 GLint dstreg; 327 GLfloat src[2][3][4]; 328 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 }; 329 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 }; 330 GLfloat dst[2][4], *dstp; 331 332 for (pass = 0; pass < shader->NumPasses; pass++) { 333 if (pass > 0) 334 finish_pass(machine); 335 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) { 336 texinst = &shader->SetupInst[pass][j]; 337 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) 338 handle_pass_op(machine, texinst, span, column, j); 339 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) 340 handle_sample_op(ctx, machine, texinst, span, column, j); 341 } 342 343 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) { 344 inst = &shader->Instructions[pass][pc]; 345 346 /* setup the source registers for color and alpha ops */ 347 for (optype = 0; optype < 2; optype++) { 348 for (i = 0; i < inst->ArgCount[optype]; i++) { 349 GLint index = inst->SrcReg[optype][i].Index; 350 351 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI) 352 SETUP_SRC_REG(optype, i, 353 machine->Registers[index - GL_REG_0_ATI]); 354 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) { 355 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) { 356 SETUP_SRC_REG(optype, i, 357 shader->Constants[index - GL_CON_0_ATI]); 358 } else { 359 SETUP_SRC_REG(optype, i, 360 ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]); 361 } 362 } 363 else if (index == GL_ONE) 364 SETUP_SRC_REG(optype, i, ones); 365 else if (index == GL_ZERO) 366 SETUP_SRC_REG(optype, i, zeros); 367 else if (index == GL_PRIMARY_COLOR_EXT) 368 SETUP_SRC_REG(optype, i, 369 machine->Inputs[ATI_FS_INPUT_PRIMARY]); 370 else if (index == GL_SECONDARY_INTERPOLATOR_ATI) 371 SETUP_SRC_REG(optype, i, 372 machine->Inputs[ATI_FS_INPUT_SECONDARY]); 373 374 apply_src_rep(optype, inst->SrcReg[optype][i].argRep, 375 src[optype][i]); 376 apply_src_mod(optype, inst->SrcReg[optype][i].argMod, 377 src[optype][i]); 378 } 379 } 380 381 /* Execute the operations - color then alpha */ 382 for (optype = 0; optype < 2; optype++) { 383 if (inst->Opcode[optype]) { 384 switch (inst->Opcode[optype]) { 385 case GL_ADD_ATI: 386 if (!optype) 387 for (i = 0; i < 3; i++) { 388 dst[optype][i] = 389 src[optype][0][i] + src[optype][1][i]; 390 } 391 else 392 dst[optype][3] = src[optype][0][3] + src[optype][1][3]; 393 break; 394 case GL_SUB_ATI: 395 if (!optype) 396 for (i = 0; i < 3; i++) { 397 dst[optype][i] = 398 src[optype][0][i] - src[optype][1][i]; 399 } 400 else 401 dst[optype][3] = src[optype][0][3] - src[optype][1][3]; 402 break; 403 case GL_MUL_ATI: 404 if (!optype) 405 for (i = 0; i < 3; i++) { 406 dst[optype][i] = 407 src[optype][0][i] * src[optype][1][i]; 408 } 409 else 410 dst[optype][3] = src[optype][0][3] * src[optype][1][3]; 411 break; 412 case GL_MAD_ATI: 413 if (!optype) 414 for (i = 0; i < 3; i++) { 415 dst[optype][i] = 416 src[optype][0][i] * src[optype][1][i] + 417 src[optype][2][i]; 418 } 419 else 420 dst[optype][3] = 421 src[optype][0][3] * src[optype][1][3] + 422 src[optype][2][3]; 423 break; 424 case GL_LERP_ATI: 425 if (!optype) 426 for (i = 0; i < 3; i++) { 427 dst[optype][i] = 428 src[optype][0][i] * src[optype][1][i] + (1 - 429 src 430 [optype] 431 [0][i]) * 432 src[optype][2][i]; 433 } 434 else 435 dst[optype][3] = 436 src[optype][0][3] * src[optype][1][3] + (1 - 437 src[optype] 438 [0][3]) * 439 src[optype][2][3]; 440 break; 441 442 case GL_MOV_ATI: 443 if (!optype) 444 for (i = 0; i < 3; i++) { 445 dst[optype][i] = src[optype][0][i]; 446 } 447 else 448 dst[optype][3] = src[optype][0][3]; 449 break; 450 case GL_CND_ATI: 451 if (!optype) { 452 for (i = 0; i < 3; i++) { 453 dst[optype][i] = 454 (src[optype][2][i] > 455 0.5) ? src[optype][0][i] : src[optype][1][i]; 456 } 457 } 458 else { 459 dst[optype][3] = 460 (src[optype][2][3] > 461 0.5) ? src[optype][0][3] : src[optype][1][3]; 462 } 463 break; 464 465 case GL_CND0_ATI: 466 if (!optype) 467 for (i = 0; i < 3; i++) { 468 dst[optype][i] = 469 (src[optype][2][i] >= 470 0) ? src[optype][0][i] : src[optype][1][i]; 471 } 472 else { 473 dst[optype][3] = 474 (src[optype][2][3] >= 475 0) ? src[optype][0][3] : src[optype][1][3]; 476 } 477 break; 478 case GL_DOT2_ADD_ATI: 479 { 480 GLfloat result; 481 482 /* DOT 2 always uses the source from the color op */ 483 /* could save recalculation of dot products for alpha inst */ 484 result = src[0][0][0] * src[0][1][0] + 485 src[0][0][1] * src[0][1][1] + src[0][2][2]; 486 if (!optype) { 487 for (i = 0; i < 3; i++) { 488 dst[optype][i] = result; 489 } 490 } 491 else 492 dst[optype][3] = result; 493 } 494 break; 495 case GL_DOT3_ATI: 496 { 497 GLfloat result; 498 499 /* DOT 3 always uses the source from the color op */ 500 result = src[0][0][0] * src[0][1][0] + 501 src[0][0][1] * src[0][1][1] + 502 src[0][0][2] * src[0][1][2]; 503 504 if (!optype) { 505 for (i = 0; i < 3; i++) { 506 dst[optype][i] = result; 507 } 508 } 509 else 510 dst[optype][3] = result; 511 } 512 break; 513 case GL_DOT4_ATI: 514 { 515 GLfloat result; 516 517 /* DOT 4 always uses the source from the color op */ 518 result = src[0][0][0] * src[0][1][0] + 519 src[0][0][1] * src[0][1][1] + 520 src[0][0][2] * src[0][1][2] + 521 src[0][0][3] * src[0][1][3]; 522 if (!optype) { 523 for (i = 0; i < 3; i++) { 524 dst[optype][i] = result; 525 } 526 } 527 else 528 dst[optype][3] = result; 529 } 530 break; 531 532 } 533 } 534 } 535 536 /* write out the destination registers */ 537 for (optype = 0; optype < 2; optype++) { 538 if (inst->Opcode[optype]) { 539 dstreg = inst->DstReg[optype].Index; 540 dstp = machine->Registers[dstreg - GL_REG_0_ATI]; 541 542 if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) && 543 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI))) 544 write_dst_addr(optype, inst->DstReg[optype].dstMod, 545 inst->DstReg[optype].dstMask, dst[optype], 546 dstp); 547 else 548 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp); 549 } 550 } 551 } 552 } 553 } 554 555 556 /** 557 * Init fragment shader virtual machine state. 558 */ 559 static void 560 init_machine(struct gl_context * ctx, struct atifs_machine *machine, 561 const struct ati_fragment_shader *shader, 562 const SWspan *span, GLuint col) 563 { 564 GLfloat (*inputs)[4] = machine->Inputs; 565 GLint i, j; 566 567 for (i = 0; i < 6; i++) { 568 for (j = 0; j < 4; j++) 569 machine->Registers[i][j] = 0.0; 570 } 571 572 COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]); 573 COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]); 574 } 575 576 577 578 /** 579 * Execute the current ATI shader program, operating on the given span. 580 */ 581 void 582 _swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span) 583 { 584 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; 585 struct atifs_machine machine; 586 GLuint i; 587 588 /* incoming colors should be floats */ 589 ASSERT(span->array->ChanType == GL_FLOAT); 590 591 for (i = 0; i < span->end; i++) { 592 if (span->array->mask[i]) { 593 init_machine(ctx, &machine, shader, span, i); 594 595 execute_shader(ctx, shader, &machine, span, i); 596 597 /* store result color */ 598 { 599 const GLfloat *colOut = machine.Registers[0]; 600 /*fprintf(stderr,"outputs %f %f %f %f\n", 601 colOut[0], colOut[1], colOut[2], colOut[3]); */ 602 COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut); 603 } 604 } 605 } 606 } 607