Home | History | Annotate | Download | only in swrast
      1 /*
      2  * Copyright (C) 2004  David Airlie   All Rights Reserved.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included
     12  * in all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     20  */
     21 
     22 #include "main/glheader.h"
     23 #include "main/colormac.h"
     24 #include "main/macros.h"
     25 #include "main/atifragshader.h"
     26 #include "main/samplerobj.h"
     27 #include "swrast/s_atifragshader.h"
     28 #include "swrast/s_context.h"
     29 
     30 
     31 /**
     32  * State for executing ATI fragment shader.
     33  */
     34 struct atifs_machine
     35 {
     36    GLfloat Registers[6][4];         /** six temporary registers */
     37    GLfloat PrevPassRegisters[6][4];
     38    GLfloat Inputs[2][4];   /** Primary, secondary input colors */
     39 };
     40 
     41 
     42 
     43 /**
     44  * Fetch a texel.
     45  */
     46 static void
     47 fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
     48 	    GLuint unit, GLfloat color[4])
     49 {
     50    SWcontext *swrast = SWRAST_CONTEXT(ctx);
     51 
     52    /* XXX use a float-valued TextureSample routine here!!! */
     53    swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit),
     54                                ctx->Texture.Unit[unit]._Current,
     55 			       1, (const GLfloat(*)[4]) texcoord,
     56                                &lambda, (GLfloat (*)[4]) color);
     57 }
     58 
     59 static void
     60 apply_swizzle(GLfloat values[4], GLuint swizzle)
     61 {
     62    GLfloat s, t, r, q;
     63 
     64    s = values[0];
     65    t = values[1];
     66    r = values[2];
     67    q = values[3];
     68 
     69    switch (swizzle) {
     70    case GL_SWIZZLE_STR_ATI:
     71       values[0] = s;
     72       values[1] = t;
     73       values[2] = r;
     74       break;
     75    case GL_SWIZZLE_STQ_ATI:
     76       values[0] = s;
     77       values[1] = t;
     78       values[2] = q;
     79       break;
     80    case GL_SWIZZLE_STR_DR_ATI:
     81       values[0] = s / r;
     82       values[1] = t / r;
     83       values[2] = 1 / r;
     84       break;
     85    case GL_SWIZZLE_STQ_DQ_ATI:
     86 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
     87       if (q == 0.0F)
     88          q = 0.000000001F;
     89       values[0] = s / q;
     90       values[1] = t / q;
     91       values[2] = 1.0F / q;
     92       break;
     93    }
     94    values[3] = 0.0;
     95 }
     96 
     97 static void
     98 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
     99 {
    100    GLint i;
    101    GLint start, end;
    102    if (!rep)
    103       return;
    104 
    105    start = optype ? 3 : 0;
    106    end = 4;
    107 
    108    for (i = start; i < end; i++) {
    109       switch (rep) {
    110       case GL_RED:
    111 	 val[i] = val[0];
    112 	 break;
    113       case GL_GREEN:
    114 	 val[i] = val[1];
    115 	 break;
    116       case GL_BLUE:
    117 	 val[i] = val[2];
    118 	 break;
    119       case GL_ALPHA:
    120 	 val[i] = val[3];
    121 	 break;
    122       }
    123    }
    124 }
    125 
    126 static void
    127 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
    128 {
    129    GLint i;
    130    GLint start, end;
    131 
    132    if (!mod)
    133       return;
    134 
    135    start = optype ? 3 : 0;
    136    end = 4;
    137 
    138    for (i = start; i < end; i++) {
    139       if (mod & GL_COMP_BIT_ATI)
    140 	 val[i] = 1 - val[i];
    141 
    142       if (mod & GL_BIAS_BIT_ATI)
    143 	 val[i] = val[i] - 0.5F;
    144 
    145       if (mod & GL_2X_BIT_ATI)
    146 	 val[i] = 2 * val[i];
    147 
    148       if (mod & GL_NEGATE_BIT_ATI)
    149 	 val[i] = -val[i];
    150    }
    151 }
    152 
    153 static void
    154 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
    155 {
    156    GLint i;
    157    GLint has_sat = mod & GL_SATURATE_BIT_ATI;
    158    GLint start, end;
    159 
    160    mod &= ~GL_SATURATE_BIT_ATI;
    161 
    162    start = optype ? 3 : 0;
    163    end = optype ? 4 : 3;
    164 
    165    for (i = start; i < end; i++) {
    166       switch (mod) {
    167       case GL_2X_BIT_ATI:
    168 	 val[i] = 2 * val[i];
    169 	 break;
    170       case GL_4X_BIT_ATI:
    171 	 val[i] = 4 * val[i];
    172 	 break;
    173       case GL_8X_BIT_ATI:
    174 	 val[i] = 8 * val[i];
    175 	 break;
    176       case GL_HALF_BIT_ATI:
    177 	 val[i] = val[i] * 0.5F;
    178 	 break;
    179       case GL_QUARTER_BIT_ATI:
    180 	 val[i] = val[i] * 0.25F;
    181 	 break;
    182       case GL_EIGHTH_BIT_ATI:
    183 	 val[i] = val[i] * 0.125F;
    184 	 break;
    185       }
    186 
    187       if (has_sat) {
    188 	 if (val[i] < 0.0F)
    189 	    val[i] = 0.0F;
    190 	 else if (val[i] > 1.0F)
    191 	    val[i] = 1.0F;
    192       }
    193       else {
    194 	 if (val[i] < -8.0F)
    195 	    val[i] = -8.0F;
    196 	 else if (val[i] > 8.0F)
    197 	    val[i] = 8.0F;
    198       }
    199    }
    200 }
    201 
    202 
    203 static void
    204 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
    205 	       GLfloat * dst)
    206 {
    207    GLint i;
    208    apply_dst_mod(optype, mod, src);
    209 
    210    if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
    211       if (mask) {
    212 	 if (mask & GL_RED_BIT_ATI)
    213 	    dst[0] = src[0];
    214 
    215 	 if (mask & GL_GREEN_BIT_ATI)
    216 	    dst[1] = src[1];
    217 
    218 	 if (mask & GL_BLUE_BIT_ATI)
    219 	    dst[2] = src[2];
    220       }
    221       else {
    222 	 for (i = 0; i < 3; i++)
    223 	    dst[i] = src[i];
    224       }
    225    }
    226    else
    227       dst[3] = src[3];
    228 }
    229 
    230 static void
    231 finish_pass(struct atifs_machine *machine)
    232 {
    233    GLint i;
    234 
    235    for (i = 0; i < 6; i++) {
    236       COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
    237    }
    238 }
    239 
    240 struct ati_fs_opcode_st ati_fs_opcodes[] = {
    241    {GL_ADD_ATI, 2},
    242    {GL_SUB_ATI, 2},
    243    {GL_MUL_ATI, 2},
    244    {GL_MAD_ATI, 3},
    245    {GL_LERP_ATI, 3},
    246    {GL_MOV_ATI, 1},
    247    {GL_CND_ATI, 3},
    248    {GL_CND0_ATI, 3},
    249    {GL_DOT2_ADD_ATI, 3},
    250    {GL_DOT3_ATI, 2},
    251    {GL_DOT4_ATI, 2}
    252 };
    253 
    254 
    255 
    256 static void
    257 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
    258 	       const SWspan *span, GLuint column, GLuint idx)
    259 {
    260    GLuint swizzle = texinst->swizzle;
    261    GLuint pass_tex = texinst->src;
    262 
    263    if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
    264       pass_tex -= GL_TEXTURE0_ARB;
    265       COPY_4V(machine->Registers[idx],
    266 	      span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
    267    }
    268    else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
    269       pass_tex -= GL_REG_0_ATI;
    270       COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
    271    }
    272    apply_swizzle(machine->Registers[idx], swizzle);
    273 
    274 }
    275 
    276 static void
    277 handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
    278 		 struct atifs_setupinst *texinst, const SWspan *span,
    279 		 GLuint column, GLuint idx)
    280 {
    281 /* sample from unit idx using texinst->src as coords */
    282    GLuint swizzle = texinst->swizzle;
    283    GLuint coord_source = texinst->src;
    284    GLfloat tex_coords[4] = { 0 };
    285 
    286    if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
    287       coord_source -= GL_TEXTURE0_ARB;
    288       COPY_4V(tex_coords,
    289               span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
    290    }
    291    else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
    292       coord_source -= GL_REG_0_ATI;
    293       COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
    294    }
    295    apply_swizzle(tex_coords, swizzle);
    296    fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
    297 }
    298 
    299 #define SETUP_SRC_REG(optype, i, x)		\
    300 do {						\
    301    COPY_4V(src[optype][i], x); 			\
    302 } while (0)
    303 
    304 
    305 
    306 /**
    307  * Execute the given fragment shader.
    308  * NOTE: we do everything in single-precision floating point
    309  * \param ctx - rendering context
    310  * \param shader - the shader to execute
    311  * \param machine - virtual machine state
    312  * \param span - the SWspan we're operating on
    313  * \param column - which pixel [i] we're operating on in the span
    314  */
    315 static void
    316 execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
    317 	       struct atifs_machine *machine, const SWspan *span,
    318                GLuint column)
    319 {
    320    GLuint pc;
    321    struct atifs_instruction *inst;
    322    struct atifs_setupinst *texinst;
    323    GLint optype;
    324    GLuint i;
    325    GLint j, pass;
    326    GLint dstreg;
    327    GLfloat src[2][3][4];
    328    GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
    329    GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
    330    GLfloat dst[2][4], *dstp;
    331 
    332    for (pass = 0; pass < shader->NumPasses; pass++) {
    333       if (pass > 0)
    334 	 finish_pass(machine);
    335       for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
    336 	 texinst = &shader->SetupInst[pass][j];
    337 	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
    338 	    handle_pass_op(machine, texinst, span, column, j);
    339 	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
    340 	    handle_sample_op(ctx, machine, texinst, span, column, j);
    341       }
    342 
    343       for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
    344 	 inst = &shader->Instructions[pass][pc];
    345 
    346 	 /* setup the source registers for color and alpha ops */
    347 	 for (optype = 0; optype < 2; optype++) {
    348  	    for (i = 0; i < inst->ArgCount[optype]; i++) {
    349 	       GLint index = inst->SrcReg[optype][i].Index;
    350 
    351 	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
    352 		  SETUP_SRC_REG(optype, i,
    353 				machine->Registers[index - GL_REG_0_ATI]);
    354 	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
    355 		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
    356 		     SETUP_SRC_REG(optype, i,
    357 				shader->Constants[index - GL_CON_0_ATI]);
    358 		  } else {
    359 		     SETUP_SRC_REG(optype, i,
    360 				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
    361 		  }
    362 	       }
    363 	       else if (index == GL_ONE)
    364 		  SETUP_SRC_REG(optype, i, ones);
    365 	       else if (index == GL_ZERO)
    366 		  SETUP_SRC_REG(optype, i, zeros);
    367 	       else if (index == GL_PRIMARY_COLOR_EXT)
    368 		  SETUP_SRC_REG(optype, i,
    369 				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
    370 	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
    371 		  SETUP_SRC_REG(optype, i,
    372 				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
    373 
    374 	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
    375 			     src[optype][i]);
    376 	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
    377 			     src[optype][i]);
    378 	    }
    379 	 }
    380 
    381 	 /* Execute the operations - color then alpha */
    382 	 for (optype = 0; optype < 2; optype++) {
    383 	    if (inst->Opcode[optype]) {
    384 	       switch (inst->Opcode[optype]) {
    385 	       case GL_ADD_ATI:
    386 		  if (!optype)
    387 		     for (i = 0; i < 3; i++) {
    388 			dst[optype][i] =
    389 			   src[optype][0][i] + src[optype][1][i];
    390 		     }
    391 		  else
    392 		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
    393 		  break;
    394 	       case GL_SUB_ATI:
    395 		  if (!optype)
    396 		     for (i = 0; i < 3; i++) {
    397 			dst[optype][i] =
    398 			   src[optype][0][i] - src[optype][1][i];
    399 		     }
    400 		  else
    401 		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
    402 		  break;
    403 	       case GL_MUL_ATI:
    404 		  if (!optype)
    405 		     for (i = 0; i < 3; i++) {
    406 			dst[optype][i] =
    407 			   src[optype][0][i] * src[optype][1][i];
    408 		     }
    409 		  else
    410 		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
    411 		  break;
    412 	       case GL_MAD_ATI:
    413 		  if (!optype)
    414 		     for (i = 0; i < 3; i++) {
    415 			dst[optype][i] =
    416 			   src[optype][0][i] * src[optype][1][i] +
    417 			   src[optype][2][i];
    418 		     }
    419 		  else
    420 		     dst[optype][3] =
    421 			src[optype][0][3] * src[optype][1][3] +
    422 			src[optype][2][3];
    423 		  break;
    424 	       case GL_LERP_ATI:
    425 		  if (!optype)
    426 		     for (i = 0; i < 3; i++) {
    427 			dst[optype][i] =
    428 			   src[optype][0][i] * src[optype][1][i] + (1 -
    429 								    src
    430 								    [optype]
    431 								    [0][i]) *
    432 			   src[optype][2][i];
    433 		     }
    434 		  else
    435 		     dst[optype][3] =
    436 			src[optype][0][3] * src[optype][1][3] + (1 -
    437 								 src[optype]
    438 								 [0][3]) *
    439 			src[optype][2][3];
    440 		  break;
    441 
    442 	       case GL_MOV_ATI:
    443 		  if (!optype)
    444 		     for (i = 0; i < 3; i++) {
    445 			dst[optype][i] = src[optype][0][i];
    446 		     }
    447 		  else
    448 		     dst[optype][3] = src[optype][0][3];
    449 		  break;
    450 	       case GL_CND_ATI:
    451 		  if (!optype) {
    452 		     for (i = 0; i < 3; i++) {
    453 			dst[optype][i] =
    454 			   (src[optype][2][i] >
    455 			    0.5) ? src[optype][0][i] : src[optype][1][i];
    456 		     }
    457 		  }
    458 		  else {
    459 		     dst[optype][3] =
    460 			(src[optype][2][3] >
    461 			 0.5) ? src[optype][0][3] : src[optype][1][3];
    462 		  }
    463 		  break;
    464 
    465 	       case GL_CND0_ATI:
    466 		  if (!optype)
    467 		     for (i = 0; i < 3; i++) {
    468 			dst[optype][i] =
    469 			   (src[optype][2][i] >=
    470 			    0) ? src[optype][0][i] : src[optype][1][i];
    471 		     }
    472 		  else {
    473 		     dst[optype][3] =
    474 			(src[optype][2][3] >=
    475 			 0) ? src[optype][0][3] : src[optype][1][3];
    476 		  }
    477 		  break;
    478 	       case GL_DOT2_ADD_ATI:
    479 		  {
    480 		     GLfloat result;
    481 
    482 		     /* DOT 2 always uses the source from the color op */
    483 		     /* could save recalculation of dot products for alpha inst */
    484 		     result = src[0][0][0] * src[0][1][0] +
    485 			src[0][0][1] * src[0][1][1] + src[0][2][2];
    486 		     if (!optype) {
    487 			for (i = 0; i < 3; i++) {
    488 			   dst[optype][i] = result;
    489 			}
    490 		     }
    491 		     else
    492 			dst[optype][3] = result;
    493 		  }
    494 		  break;
    495 	       case GL_DOT3_ATI:
    496 		  {
    497 		     GLfloat result;
    498 
    499 		     /* DOT 3 always uses the source from the color op */
    500 		     result = src[0][0][0] * src[0][1][0] +
    501 			src[0][0][1] * src[0][1][1] +
    502 			src[0][0][2] * src[0][1][2];
    503 
    504 		     if (!optype) {
    505 			for (i = 0; i < 3; i++) {
    506 			   dst[optype][i] = result;
    507 			}
    508 		     }
    509 		     else
    510 			dst[optype][3] = result;
    511 		  }
    512 		  break;
    513 	       case GL_DOT4_ATI:
    514 		  {
    515 		     GLfloat result;
    516 
    517 		     /* DOT 4 always uses the source from the color op */
    518 		     result = src[0][0][0] * src[0][1][0] +
    519 			src[0][0][1] * src[0][1][1] +
    520 			src[0][0][2] * src[0][1][2] +
    521 			src[0][0][3] * src[0][1][3];
    522 		     if (!optype) {
    523 			for (i = 0; i < 3; i++) {
    524 			   dst[optype][i] = result;
    525 			}
    526 		     }
    527 		     else
    528 			dst[optype][3] = result;
    529 		  }
    530 		  break;
    531 
    532 	       }
    533 	    }
    534 	 }
    535 
    536 	 /* write out the destination registers */
    537 	 for (optype = 0; optype < 2; optype++) {
    538 	    if (inst->Opcode[optype]) {
    539 	       dstreg = inst->DstReg[optype].Index;
    540 	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
    541 
    542 	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
    543 		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
    544 	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
    545 			      inst->DstReg[optype].dstMask, dst[optype],
    546 			      dstp);
    547 	       else
    548 		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
    549 	    }
    550 	 }
    551       }
    552    }
    553 }
    554 
    555 
    556 /**
    557  * Init fragment shader virtual machine state.
    558  */
    559 static void
    560 init_machine(struct gl_context * ctx, struct atifs_machine *machine,
    561 	     const struct ati_fragment_shader *shader,
    562 	     const SWspan *span, GLuint col)
    563 {
    564    GLfloat (*inputs)[4] = machine->Inputs;
    565    GLint i, j;
    566 
    567    for (i = 0; i < 6; i++) {
    568       for (j = 0; j < 4; j++)
    569 	 machine->Registers[i][j] = 0.0;
    570    }
    571 
    572    COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
    573    COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
    574 }
    575 
    576 
    577 
    578 /**
    579  * Execute the current ATI shader program, operating on the given span.
    580  */
    581 void
    582 _swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
    583 {
    584    const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
    585    struct atifs_machine machine;
    586    GLuint i;
    587 
    588    /* incoming colors should be floats */
    589    ASSERT(span->array->ChanType == GL_FLOAT);
    590 
    591    for (i = 0; i < span->end; i++) {
    592       if (span->array->mask[i]) {
    593 	 init_machine(ctx, &machine, shader, span, i);
    594 
    595 	 execute_shader(ctx, shader, &machine, span, i);
    596 
    597          /* store result color */
    598 	 {
    599 	    const GLfloat *colOut = machine.Registers[0];
    600             /*fprintf(stderr,"outputs %f %f %f %f\n",
    601               colOut[0], colOut[1], colOut[2], colOut[3]); */
    602             COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
    603 	 }
    604       }
    605    }
    606 }
    607