Home | History | Annotate | Download | only in swrast
      1 /*
      2  * Copyright (C) 2004  David Airlie   All Rights Reserved.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included
     12  * in all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     20  */
     21 
     22 #include "main/glheader.h"
     23 #include "main/macros.h"
     24 #include "main/atifragshader.h"
     25 #include "main/samplerobj.h"
     26 #include "swrast/s_atifragshader.h"
     27 #include "swrast/s_context.h"
     28 
     29 #define ATI_FS_INPUT_PRIMARY 0
     30 #define ATI_FS_INPUT_SECONDARY 1
     31 
     32 /**
     33  * State for executing ATI fragment shader.
     34  */
     35 struct atifs_machine
     36 {
     37    GLfloat Registers[6][4];         /** six temporary registers */
     38    GLfloat PrevPassRegisters[6][4];
     39    GLfloat Inputs[2][4];   /** Primary, secondary input colors */
     40 };
     41 
     42 
     43 
     44 /**
     45  * Fetch a texel.
     46  */
     47 static void
     48 fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
     49 	    GLuint unit, GLfloat color[4])
     50 {
     51    SWcontext *swrast = SWRAST_CONTEXT(ctx);
     52 
     53    /* XXX use a float-valued TextureSample routine here!!! */
     54    swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit),
     55                                ctx->Texture.Unit[unit]._Current,
     56 			       1, (const GLfloat(*)[4]) texcoord,
     57                                &lambda, (GLfloat (*)[4]) color);
     58 }
     59 
     60 static void
     61 apply_swizzle(GLfloat values[4], GLuint swizzle)
     62 {
     63    GLfloat s, t, r, q;
     64 
     65    s = values[0];
     66    t = values[1];
     67    r = values[2];
     68    q = values[3];
     69 
     70    switch (swizzle) {
     71    case GL_SWIZZLE_STR_ATI:
     72       values[0] = s;
     73       values[1] = t;
     74       values[2] = r;
     75       break;
     76    case GL_SWIZZLE_STQ_ATI:
     77       values[0] = s;
     78       values[1] = t;
     79       values[2] = q;
     80       break;
     81    case GL_SWIZZLE_STR_DR_ATI:
     82       values[0] = s / r;
     83       values[1] = t / r;
     84       values[2] = 1 / r;
     85       break;
     86    case GL_SWIZZLE_STQ_DQ_ATI:
     87 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
     88       if (q == 0.0F)
     89          q = 0.000000001F;
     90       values[0] = s / q;
     91       values[1] = t / q;
     92       values[2] = 1.0F / q;
     93       break;
     94    }
     95    values[3] = 0.0;
     96 }
     97 
     98 static void
     99 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
    100 {
    101    GLint i;
    102    GLint start, end;
    103    if (!rep)
    104       return;
    105 
    106    start = optype ? 3 : 0;
    107    end = 4;
    108 
    109    for (i = start; i < end; i++) {
    110       switch (rep) {
    111       case GL_RED:
    112 	 val[i] = val[0];
    113 	 break;
    114       case GL_GREEN:
    115 	 val[i] = val[1];
    116 	 break;
    117       case GL_BLUE:
    118 	 val[i] = val[2];
    119 	 break;
    120       case GL_ALPHA:
    121 	 val[i] = val[3];
    122 	 break;
    123       }
    124    }
    125 }
    126 
    127 static void
    128 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
    129 {
    130    GLint i;
    131    GLint start, end;
    132 
    133    if (!mod)
    134       return;
    135 
    136    start = optype ? 3 : 0;
    137    end = 4;
    138 
    139    for (i = start; i < end; i++) {
    140       if (mod & GL_COMP_BIT_ATI)
    141 	 val[i] = 1 - val[i];
    142 
    143       if (mod & GL_BIAS_BIT_ATI)
    144 	 val[i] = val[i] - 0.5F;
    145 
    146       if (mod & GL_2X_BIT_ATI)
    147 	 val[i] = 2 * val[i];
    148 
    149       if (mod & GL_NEGATE_BIT_ATI)
    150 	 val[i] = -val[i];
    151    }
    152 }
    153 
    154 static void
    155 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
    156 {
    157    GLint i;
    158    GLint has_sat = mod & GL_SATURATE_BIT_ATI;
    159    GLint start, end;
    160 
    161    mod &= ~GL_SATURATE_BIT_ATI;
    162 
    163    start = optype ? 3 : 0;
    164    end = optype ? 4 : 3;
    165 
    166    for (i = start; i < end; i++) {
    167       switch (mod) {
    168       case GL_2X_BIT_ATI:
    169 	 val[i] = 2 * val[i];
    170 	 break;
    171       case GL_4X_BIT_ATI:
    172 	 val[i] = 4 * val[i];
    173 	 break;
    174       case GL_8X_BIT_ATI:
    175 	 val[i] = 8 * val[i];
    176 	 break;
    177       case GL_HALF_BIT_ATI:
    178 	 val[i] = val[i] * 0.5F;
    179 	 break;
    180       case GL_QUARTER_BIT_ATI:
    181 	 val[i] = val[i] * 0.25F;
    182 	 break;
    183       case GL_EIGHTH_BIT_ATI:
    184 	 val[i] = val[i] * 0.125F;
    185 	 break;
    186       }
    187 
    188       if (has_sat) {
    189 	 if (val[i] < 0.0F)
    190 	    val[i] = 0.0F;
    191 	 else if (val[i] > 1.0F)
    192 	    val[i] = 1.0F;
    193       }
    194       else {
    195 	 if (val[i] < -8.0F)
    196 	    val[i] = -8.0F;
    197 	 else if (val[i] > 8.0F)
    198 	    val[i] = 8.0F;
    199       }
    200    }
    201 }
    202 
    203 
    204 static void
    205 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
    206 	       GLfloat * dst)
    207 {
    208    GLint i;
    209    apply_dst_mod(optype, mod, src);
    210 
    211    if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
    212       if (mask) {
    213 	 if (mask & GL_RED_BIT_ATI)
    214 	    dst[0] = src[0];
    215 
    216 	 if (mask & GL_GREEN_BIT_ATI)
    217 	    dst[1] = src[1];
    218 
    219 	 if (mask & GL_BLUE_BIT_ATI)
    220 	    dst[2] = src[2];
    221       }
    222       else {
    223 	 for (i = 0; i < 3; i++)
    224 	    dst[i] = src[i];
    225       }
    226    }
    227    else
    228       dst[3] = src[3];
    229 }
    230 
    231 static void
    232 finish_pass(struct atifs_machine *machine)
    233 {
    234    GLint i;
    235 
    236    for (i = 0; i < 6; i++) {
    237       COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
    238    }
    239 }
    240 
    241 
    242 static void
    243 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
    244 	       const SWspan *span, GLuint column, GLuint idx)
    245 {
    246    GLuint swizzle = texinst->swizzle;
    247    GLuint pass_tex = texinst->src;
    248 
    249    if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
    250       pass_tex -= GL_TEXTURE0_ARB;
    251       COPY_4V(machine->Registers[idx],
    252 	      span->array->attribs[VARYING_SLOT_TEX0 + pass_tex][column]);
    253    }
    254    else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
    255       pass_tex -= GL_REG_0_ATI;
    256       COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
    257    }
    258    apply_swizzle(machine->Registers[idx], swizzle);
    259 
    260 }
    261 
    262 static void
    263 handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
    264 		 struct atifs_setupinst *texinst, const SWspan *span,
    265 		 GLuint column, GLuint idx)
    266 {
    267 /* sample from unit idx using texinst->src as coords */
    268    GLuint swizzle = texinst->swizzle;
    269    GLuint coord_source = texinst->src;
    270    GLfloat tex_coords[4] = { 0 };
    271 
    272    if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
    273       coord_source -= GL_TEXTURE0_ARB;
    274       COPY_4V(tex_coords,
    275               span->array->attribs[VARYING_SLOT_TEX0 + coord_source][column]);
    276    }
    277    else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
    278       coord_source -= GL_REG_0_ATI;
    279       COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
    280    }
    281    apply_swizzle(tex_coords, swizzle);
    282    fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
    283 }
    284 
    285 #define SETUP_SRC_REG(optype, i, x)		\
    286 do {						\
    287    COPY_4V(src[optype][i], x); 			\
    288 } while (0)
    289 
    290 
    291 
    292 /**
    293  * Execute the given fragment shader.
    294  * NOTE: we do everything in single-precision floating point
    295  * \param ctx - rendering context
    296  * \param shader - the shader to execute
    297  * \param machine - virtual machine state
    298  * \param span - the SWspan we're operating on
    299  * \param column - which pixel [i] we're operating on in the span
    300  */
    301 static void
    302 execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
    303 	       struct atifs_machine *machine, const SWspan *span,
    304                GLuint column)
    305 {
    306    GLuint pc;
    307    struct atifs_instruction *inst;
    308    struct atifs_setupinst *texinst;
    309    GLint optype;
    310    GLuint i;
    311    GLint j, pass;
    312    GLint dstreg;
    313    GLfloat src[2][3][4];
    314    GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
    315    GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
    316    GLfloat dst[2][4], *dstp;
    317 
    318    for (pass = 0; pass < shader->NumPasses; pass++) {
    319       if (pass > 0)
    320 	 finish_pass(machine);
    321       for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
    322 	 texinst = &shader->SetupInst[pass][j];
    323 	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
    324 	    handle_pass_op(machine, texinst, span, column, j);
    325 	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
    326 	    handle_sample_op(ctx, machine, texinst, span, column, j);
    327       }
    328 
    329       for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
    330 	 inst = &shader->Instructions[pass][pc];
    331 
    332 	 /* setup the source registers for color and alpha ops */
    333 	 for (optype = 0; optype < 2; optype++) {
    334  	    for (i = 0; i < inst->ArgCount[optype]; i++) {
    335 	       GLint index = inst->SrcReg[optype][i].Index;
    336 
    337 	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
    338 		  SETUP_SRC_REG(optype, i,
    339 				machine->Registers[index - GL_REG_0_ATI]);
    340 	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
    341 		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
    342 		     SETUP_SRC_REG(optype, i,
    343 				shader->Constants[index - GL_CON_0_ATI]);
    344 		  } else {
    345 		     SETUP_SRC_REG(optype, i,
    346 				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
    347 		  }
    348 	       }
    349 	       else if (index == GL_ONE)
    350 		  SETUP_SRC_REG(optype, i, ones);
    351 	       else if (index == GL_ZERO)
    352 		  SETUP_SRC_REG(optype, i, zeros);
    353 	       else if (index == GL_PRIMARY_COLOR_EXT)
    354 		  SETUP_SRC_REG(optype, i,
    355 				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
    356 	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
    357 		  SETUP_SRC_REG(optype, i,
    358 				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
    359 
    360 	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
    361 			     src[optype][i]);
    362 	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
    363 			     src[optype][i]);
    364 	    }
    365 	 }
    366 
    367 	 /* Execute the operations - color then alpha */
    368 	 for (optype = 0; optype < 2; optype++) {
    369 	    if (inst->Opcode[optype]) {
    370 	       switch (inst->Opcode[optype]) {
    371 	       case GL_ADD_ATI:
    372 		  if (!optype)
    373 		     for (i = 0; i < 3; i++) {
    374 			dst[optype][i] =
    375 			   src[optype][0][i] + src[optype][1][i];
    376 		     }
    377 		  else
    378 		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
    379 		  break;
    380 	       case GL_SUB_ATI:
    381 		  if (!optype)
    382 		     for (i = 0; i < 3; i++) {
    383 			dst[optype][i] =
    384 			   src[optype][0][i] - src[optype][1][i];
    385 		     }
    386 		  else
    387 		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
    388 		  break;
    389 	       case GL_MUL_ATI:
    390 		  if (!optype)
    391 		     for (i = 0; i < 3; i++) {
    392 			dst[optype][i] =
    393 			   src[optype][0][i] * src[optype][1][i];
    394 		     }
    395 		  else
    396 		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
    397 		  break;
    398 	       case GL_MAD_ATI:
    399 		  if (!optype)
    400 		     for (i = 0; i < 3; i++) {
    401 			dst[optype][i] =
    402 			   src[optype][0][i] * src[optype][1][i] +
    403 			   src[optype][2][i];
    404 		     }
    405 		  else
    406 		     dst[optype][3] =
    407 			src[optype][0][3] * src[optype][1][3] +
    408 			src[optype][2][3];
    409 		  break;
    410 	       case GL_LERP_ATI:
    411 		  if (!optype)
    412 		     for (i = 0; i < 3; i++) {
    413 			dst[optype][i] =
    414 			   src[optype][0][i] * src[optype][1][i] + (1 -
    415 								    src
    416 								    [optype]
    417 								    [0][i]) *
    418 			   src[optype][2][i];
    419 		     }
    420 		  else
    421 		     dst[optype][3] =
    422 			src[optype][0][3] * src[optype][1][3] + (1 -
    423 								 src[optype]
    424 								 [0][3]) *
    425 			src[optype][2][3];
    426 		  break;
    427 
    428 	       case GL_MOV_ATI:
    429 		  if (!optype)
    430 		     for (i = 0; i < 3; i++) {
    431 			dst[optype][i] = src[optype][0][i];
    432 		     }
    433 		  else
    434 		     dst[optype][3] = src[optype][0][3];
    435 		  break;
    436 	       case GL_CND_ATI:
    437 		  if (!optype) {
    438 		     for (i = 0; i < 3; i++) {
    439 			dst[optype][i] =
    440 			   (src[optype][2][i] >
    441 			    0.5F) ? src[optype][0][i] : src[optype][1][i];
    442 		     }
    443 		  }
    444 		  else {
    445 		     dst[optype][3] =
    446 			(src[optype][2][3] >
    447 			 0.5F) ? src[optype][0][3] : src[optype][1][3];
    448 		  }
    449 		  break;
    450 
    451 	       case GL_CND0_ATI:
    452 		  if (!optype)
    453 		     for (i = 0; i < 3; i++) {
    454 			dst[optype][i] =
    455 			   (src[optype][2][i] >=
    456 			    0) ? src[optype][0][i] : src[optype][1][i];
    457 		     }
    458 		  else {
    459 		     dst[optype][3] =
    460 			(src[optype][2][3] >=
    461 			 0) ? src[optype][0][3] : src[optype][1][3];
    462 		  }
    463 		  break;
    464 	       case GL_DOT2_ADD_ATI:
    465 		  {
    466 		     GLfloat result;
    467 
    468 		     /* DOT 2 always uses the source from the color op */
    469 		     /* could save recalculation of dot products for alpha inst */
    470 		     result = src[0][0][0] * src[0][1][0] +
    471 			src[0][0][1] * src[0][1][1] + src[0][2][2];
    472 		     if (!optype) {
    473 			for (i = 0; i < 3; i++) {
    474 			   dst[optype][i] = result;
    475 			}
    476 		     }
    477 		     else
    478 			dst[optype][3] = result;
    479 		  }
    480 		  break;
    481 	       case GL_DOT3_ATI:
    482 		  {
    483 		     GLfloat result;
    484 
    485 		     /* DOT 3 always uses the source from the color op */
    486 		     result = src[0][0][0] * src[0][1][0] +
    487 			src[0][0][1] * src[0][1][1] +
    488 			src[0][0][2] * src[0][1][2];
    489 
    490 		     if (!optype) {
    491 			for (i = 0; i < 3; i++) {
    492 			   dst[optype][i] = result;
    493 			}
    494 		     }
    495 		     else
    496 			dst[optype][3] = result;
    497 		  }
    498 		  break;
    499 	       case GL_DOT4_ATI:
    500 		  {
    501 		     GLfloat result;
    502 
    503 		     /* DOT 4 always uses the source from the color op */
    504 		     result = src[0][0][0] * src[0][1][0] +
    505 			src[0][0][1] * src[0][1][1] +
    506 			src[0][0][2] * src[0][1][2] +
    507 			src[0][0][3] * src[0][1][3];
    508 		     if (!optype) {
    509 			for (i = 0; i < 3; i++) {
    510 			   dst[optype][i] = result;
    511 			}
    512 		     }
    513 		     else
    514 			dst[optype][3] = result;
    515 		  }
    516 		  break;
    517 
    518 	       }
    519 	    }
    520 	 }
    521 
    522 	 /* write out the destination registers */
    523 	 for (optype = 0; optype < 2; optype++) {
    524 	    if (inst->Opcode[optype]) {
    525 	       dstreg = inst->DstReg[optype].Index;
    526 	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
    527 
    528 	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
    529 		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
    530 	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
    531 			      inst->DstReg[optype].dstMask, dst[optype],
    532 			      dstp);
    533 	       else
    534 		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
    535 	    }
    536 	 }
    537       }
    538    }
    539 }
    540 
    541 
    542 /**
    543  * Init fragment shader virtual machine state.
    544  */
    545 static void
    546 init_machine(struct gl_context * ctx, struct atifs_machine *machine,
    547 	     const struct ati_fragment_shader *shader,
    548 	     const SWspan *span, GLuint col)
    549 {
    550    GLfloat (*inputs)[4] = machine->Inputs;
    551    GLint i, j;
    552 
    553    for (i = 0; i < 6; i++) {
    554       for (j = 0; j < 4; j++)
    555 	 machine->Registers[i][j] = 0.0;
    556    }
    557 
    558    COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[VARYING_SLOT_COL0][col]);
    559    COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[VARYING_SLOT_COL1][col]);
    560 }
    561 
    562 
    563 
    564 /**
    565  * Execute the current ATI shader program, operating on the given span.
    566  */
    567 void
    568 _swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
    569 {
    570    const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
    571    struct atifs_machine machine;
    572    GLuint i;
    573 
    574    /* incoming colors should be floats */
    575    assert(span->array->ChanType == GL_FLOAT);
    576 
    577    for (i = 0; i < span->end; i++) {
    578       if (span->array->mask[i]) {
    579 	 init_machine(ctx, &machine, shader, span, i);
    580 
    581 	 execute_shader(ctx, shader, &machine, span, i);
    582 
    583          /* store result color */
    584 	 {
    585 	    const GLfloat *colOut = machine.Registers[0];
    586             /*fprintf(stderr,"outputs %f %f %f %f\n",
    587               colOut[0], colOut[1], colOut[2], colOut[3]); */
    588             COPY_4V(span->array->attribs[VARYING_SLOT_COL0][i], colOut);
    589 	 }
    590       }
    591    }
    592 }
    593