Home | History | Annotate | Download | only in swrast
      1 /*
      2  * Mesa 3-D graphics library
      3  * Version:  7.5
      4  *
      5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
      6  * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice shall be included
     16  * in all copies or substantial portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     21  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 
     27 #include "main/glheader.h"
     28 #include "main/context.h"
     29 #include "main/colormac.h"
     30 #include "main/imports.h"
     31 #include "main/pixeltransfer.h"
     32 #include "main/samplerobj.h"
     33 #include "program/prog_instruction.h"
     34 
     35 #include "s_context.h"
     36 #include "s_texcombine.h"
     37 
     38 
     39 /**
     40  * Pointer to array of float[4]
     41  * This type makes the code below more concise and avoids a lot of casting.
     42  */
     43 typedef float (*float4_array)[4];
     44 
     45 
     46 /**
     47  * Return array of texels for given unit.
     48  */
     49 static inline float4_array
     50 get_texel_array(SWcontext *swrast, GLuint unit)
     51 {
     52 #ifdef _OPENMP
     53    return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4 * omp_get_num_threads() + (SWRAST_MAX_WIDTH * 4 * omp_get_thread_num()));
     54 #else
     55    return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4);
     56 #endif
     57 }
     58 
     59 
     60 
     61 /**
     62  * Do texture application for:
     63  *  GL_EXT_texture_env_combine
     64  *  GL_ARB_texture_env_combine
     65  *  GL_EXT_texture_env_dot3
     66  *  GL_ARB_texture_env_dot3
     67  *  GL_ATI_texture_env_combine3
     68  *  GL_NV_texture_env_combine4
     69  *  conventional GL texture env modes
     70  *
     71  * \param ctx          rendering context
     72  * \param unit         the texture combiner unit
     73  * \param primary_rgba incoming fragment color array
     74  * \param texelBuffer  pointer to texel colors for all texture units
     75  *
     76  * \param span         two fields are used in this function:
     77  *                       span->end: number of fragments to process
     78  *                       span->array->rgba: incoming/result fragment colors
     79  */
     80 static void
     81 texture_combine( struct gl_context *ctx, GLuint unit,
     82                  const float4_array primary_rgba,
     83                  const GLfloat *texelBuffer,
     84                  SWspan *span )
     85 {
     86    SWcontext *swrast = SWRAST_CONTEXT(ctx);
     87    const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
     88    const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
     89    float4_array argRGB[MAX_COMBINER_TERMS];
     90    float4_array argA[MAX_COMBINER_TERMS];
     91    const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
     92    const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
     93    const GLuint numArgsRGB = combine->_NumArgsRGB;
     94    const GLuint numArgsA = combine->_NumArgsA;
     95    float4_array ccolor[4], rgba;
     96    GLuint i, term;
     97    GLuint n = span->end;
     98    GLchan (*rgbaChan)[4] = span->array->rgba;
     99 
    100    /* alloc temp pixel buffers */
    101    rgba = (float4_array) malloc(4 * n * sizeof(GLfloat));
    102    if (!rgba) {
    103       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
    104       return;
    105    }
    106 
    107    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
    108       ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat));
    109       if (!ccolor[i]) {
    110          while (i) {
    111             free(ccolor[i]);
    112             i--;
    113          }
    114          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
    115          free(rgba);
    116          return;
    117       }
    118    }
    119 
    120    for (i = 0; i < n; i++) {
    121       rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
    122       rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
    123       rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
    124       rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
    125    }
    126 
    127    /*
    128    printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
    129           combine->ModeRGB,
    130           combine->ModeA,
    131           combine->SourceRGB[0],
    132           combine->SourceA[0],
    133           combine->SourceRGB[1],
    134           combine->SourceA[1]);
    135    */
    136 
    137    /*
    138     * Do operand setup for up to 4 operands.  Loop over the terms.
    139     */
    140    for (term = 0; term < numArgsRGB; term++) {
    141       const GLenum srcRGB = combine->SourceRGB[term];
    142       const GLenum operandRGB = combine->OperandRGB[term];
    143 
    144       switch (srcRGB) {
    145          case GL_TEXTURE:
    146             argRGB[term] = get_texel_array(swrast, unit);
    147             break;
    148          case GL_PRIMARY_COLOR:
    149             argRGB[term] = primary_rgba;
    150             break;
    151          case GL_PREVIOUS:
    152             argRGB[term] = rgba;
    153             break;
    154          case GL_CONSTANT:
    155             {
    156                float4_array c = ccolor[term];
    157                GLfloat red   = textureUnit->EnvColor[0];
    158                GLfloat green = textureUnit->EnvColor[1];
    159                GLfloat blue  = textureUnit->EnvColor[2];
    160                GLfloat alpha = textureUnit->EnvColor[3];
    161                for (i = 0; i < n; i++) {
    162                   ASSIGN_4V(c[i], red, green, blue, alpha);
    163                }
    164                argRGB[term] = ccolor[term];
    165             }
    166             break;
    167 	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
    168 	  */
    169 	 case GL_ZERO:
    170             {
    171                float4_array c = ccolor[term];
    172                for (i = 0; i < n; i++) {
    173                   ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
    174                }
    175                argRGB[term] = ccolor[term];
    176             }
    177             break;
    178 	 case GL_ONE:
    179             {
    180                float4_array c = ccolor[term];
    181                for (i = 0; i < n; i++) {
    182                   ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
    183                }
    184                argRGB[term] = ccolor[term];
    185             }
    186             break;
    187          default:
    188             /* ARB_texture_env_crossbar source */
    189             {
    190                const GLuint srcUnit = srcRGB - GL_TEXTURE0;
    191                ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
    192                if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
    193                   goto end;
    194                argRGB[term] = get_texel_array(swrast, srcUnit);
    195             }
    196       }
    197 
    198       if (operandRGB != GL_SRC_COLOR) {
    199          float4_array src = argRGB[term];
    200          float4_array dst = ccolor[term];
    201 
    202          /* point to new arg[term] storage */
    203          argRGB[term] = ccolor[term];
    204 
    205          switch (operandRGB) {
    206          case GL_ONE_MINUS_SRC_COLOR:
    207             for (i = 0; i < n; i++) {
    208                dst[i][RCOMP] = 1.0F - src[i][RCOMP];
    209                dst[i][GCOMP] = 1.0F - src[i][GCOMP];
    210                dst[i][BCOMP] = 1.0F - src[i][BCOMP];
    211             }
    212             break;
    213          case GL_SRC_ALPHA:
    214             for (i = 0; i < n; i++) {
    215                dst[i][RCOMP] =
    216                dst[i][GCOMP] =
    217                dst[i][BCOMP] = src[i][ACOMP];
    218             }
    219             break;
    220          case GL_ONE_MINUS_SRC_ALPHA:
    221             for (i = 0; i < n; i++) {
    222                dst[i][RCOMP] =
    223                dst[i][GCOMP] =
    224                dst[i][BCOMP] = 1.0F - src[i][ACOMP];
    225             }
    226             break;
    227          default:
    228             _mesa_problem(ctx, "Bad operandRGB");
    229          }
    230       }
    231    }
    232 
    233    /*
    234     * Set up the argA[term] pointers
    235     */
    236    for (term = 0; term < numArgsA; term++) {
    237       const GLenum srcA = combine->SourceA[term];
    238       const GLenum operandA = combine->OperandA[term];
    239 
    240       switch (srcA) {
    241          case GL_TEXTURE:
    242             argA[term] = get_texel_array(swrast, unit);
    243             break;
    244          case GL_PRIMARY_COLOR:
    245             argA[term] = primary_rgba;
    246             break;
    247          case GL_PREVIOUS:
    248             argA[term] = rgba;
    249             break;
    250          case GL_CONSTANT:
    251             {
    252                float4_array c = ccolor[term];
    253                GLfloat alpha = textureUnit->EnvColor[3];
    254                for (i = 0; i < n; i++)
    255                   c[i][ACOMP] = alpha;
    256                argA[term] = ccolor[term];
    257             }
    258             break;
    259 	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
    260 	  */
    261 	 case GL_ZERO:
    262             {
    263                float4_array c = ccolor[term];
    264                for (i = 0; i < n; i++)
    265                   c[i][ACOMP] = 0.0F;
    266                argA[term] = ccolor[term];
    267             }
    268             break;
    269 	 case GL_ONE:
    270             {
    271                float4_array c = ccolor[term];
    272                for (i = 0; i < n; i++)
    273                   c[i][ACOMP] = 1.0F;
    274                argA[term] = ccolor[term];
    275             }
    276             break;
    277          default:
    278             /* ARB_texture_env_crossbar source */
    279             {
    280                const GLuint srcUnit = srcA - GL_TEXTURE0;
    281                ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
    282                if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
    283                   goto end;
    284                argA[term] = get_texel_array(swrast, srcUnit);
    285             }
    286       }
    287 
    288       if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
    289          float4_array src = argA[term];
    290          float4_array dst = ccolor[term];
    291          argA[term] = ccolor[term];
    292          for (i = 0; i < n; i++) {
    293             dst[i][ACOMP] = 1.0F - src[i][ACOMP];
    294          }
    295       }
    296    }
    297 
    298    /* RGB channel combine */
    299    {
    300       float4_array arg0 = argRGB[0];
    301       float4_array arg1 = argRGB[1];
    302       float4_array arg2 = argRGB[2];
    303       float4_array arg3 = argRGB[3];
    304 
    305       switch (combine->ModeRGB) {
    306       case GL_REPLACE:
    307          for (i = 0; i < n; i++) {
    308             rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
    309             rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
    310             rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
    311          }
    312          break;
    313       case GL_MODULATE:
    314          for (i = 0; i < n; i++) {
    315             rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
    316             rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
    317             rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
    318          }
    319          break;
    320       case GL_ADD:
    321          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
    322             /* (a * b) + (c * d) */
    323             for (i = 0; i < n; i++) {
    324                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
    325                                  arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
    326                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
    327                                  arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
    328                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
    329                                  arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
    330             }
    331          }
    332          else {
    333             /* 2-term addition */
    334             for (i = 0; i < n; i++) {
    335                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
    336                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
    337                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
    338             }
    339          }
    340          break;
    341       case GL_ADD_SIGNED:
    342          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
    343             /* (a * b) + (c * d) - 0.5 */
    344             for (i = 0; i < n; i++) {
    345                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
    346                                  arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
    347                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
    348                                  arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
    349                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
    350                                  arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
    351             }
    352          }
    353          else {
    354             for (i = 0; i < n; i++) {
    355                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
    356                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
    357                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
    358             }
    359          }
    360          break;
    361       case GL_INTERPOLATE:
    362          for (i = 0; i < n; i++) {
    363             rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
    364                           arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
    365             rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
    366                           arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
    367             rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
    368                           arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
    369          }
    370          break;
    371       case GL_SUBTRACT:
    372          for (i = 0; i < n; i++) {
    373             rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
    374             rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
    375             rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
    376          }
    377          break;
    378       case GL_DOT3_RGB_EXT:
    379       case GL_DOT3_RGBA_EXT:
    380          /* Do not scale the result by 1 2 or 4 */
    381          for (i = 0; i < n; i++) {
    382             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
    383                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
    384                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
    385                * 4.0F;
    386             dot = CLAMP(dot, 0.0F, 1.0F);
    387             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
    388          }
    389          break;
    390       case GL_DOT3_RGB:
    391       case GL_DOT3_RGBA:
    392          /* DO scale the result by 1 2 or 4 */
    393          for (i = 0; i < n; i++) {
    394             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
    395                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
    396                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
    397                * 4.0F * scaleRGB;
    398             dot = CLAMP(dot, 0.0F, 1.0F);
    399             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
    400          }
    401          break;
    402       case GL_MODULATE_ADD_ATI:
    403          for (i = 0; i < n; i++) {
    404             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
    405                               arg1[i][RCOMP]) * scaleRGB;
    406             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
    407                               arg1[i][GCOMP]) * scaleRGB;
    408             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
    409                               arg1[i][BCOMP]) * scaleRGB;
    410 	 }
    411          break;
    412       case GL_MODULATE_SIGNED_ADD_ATI:
    413          for (i = 0; i < n; i++) {
    414             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
    415                               arg1[i][RCOMP] - 0.5F) * scaleRGB;
    416             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
    417                               arg1[i][GCOMP] - 0.5F) * scaleRGB;
    418             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
    419                               arg1[i][BCOMP] - 0.5F) * scaleRGB;
    420 	 }
    421          break;
    422       case GL_MODULATE_SUBTRACT_ATI:
    423          for (i = 0; i < n; i++) {
    424             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
    425                               arg1[i][RCOMP]) * scaleRGB;
    426             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
    427                               arg1[i][GCOMP]) * scaleRGB;
    428             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
    429                               arg1[i][BCOMP]) * scaleRGB;
    430 	 }
    431          break;
    432       case GL_BUMP_ENVMAP_ATI:
    433          /* this produces a fixed rgba color, and the coord calc is done elsewhere */
    434          for (i = 0; i < n; i++) {
    435             /* rgba result is 0,0,0,1 */
    436             rgba[i][RCOMP] = 0.0;
    437             rgba[i][GCOMP] = 0.0;
    438             rgba[i][BCOMP] = 0.0;
    439             rgba[i][ACOMP] = 1.0;
    440 	 }
    441          goto end; /* no alpha processing */
    442       default:
    443          _mesa_problem(ctx, "invalid combine mode");
    444       }
    445    }
    446 
    447    /* Alpha channel combine */
    448    {
    449       float4_array arg0 = argA[0];
    450       float4_array arg1 = argA[1];
    451       float4_array arg2 = argA[2];
    452       float4_array arg3 = argA[3];
    453 
    454       switch (combine->ModeA) {
    455       case GL_REPLACE:
    456          for (i = 0; i < n; i++) {
    457             rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
    458          }
    459          break;
    460       case GL_MODULATE:
    461          for (i = 0; i < n; i++) {
    462             rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
    463          }
    464          break;
    465       case GL_ADD:
    466          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
    467             /* (a * b) + (c * d) */
    468             for (i = 0; i < n; i++) {
    469                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
    470                                  arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
    471             }
    472          }
    473          else {
    474             /* two-term add */
    475             for (i = 0; i < n; i++) {
    476                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
    477             }
    478          }
    479          break;
    480       case GL_ADD_SIGNED:
    481          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
    482             /* (a * b) + (c * d) - 0.5 */
    483             for (i = 0; i < n; i++) {
    484                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
    485                                  arg2[i][ACOMP] * arg3[i][ACOMP] -
    486                                  0.5F) * scaleA;
    487             }
    488          }
    489          else {
    490             /* a + b - 0.5 */
    491             for (i = 0; i < n; i++) {
    492                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
    493             }
    494          }
    495          break;
    496       case GL_INTERPOLATE:
    497          for (i = 0; i < n; i++) {
    498             rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
    499                               arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
    500                * scaleA;
    501          }
    502          break;
    503       case GL_SUBTRACT:
    504          for (i = 0; i < n; i++) {
    505             rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
    506          }
    507          break;
    508       case GL_MODULATE_ADD_ATI:
    509          for (i = 0; i < n; i++) {
    510             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
    511                               + arg1[i][ACOMP]) * scaleA;
    512          }
    513          break;
    514       case GL_MODULATE_SIGNED_ADD_ATI:
    515          for (i = 0; i < n; i++) {
    516             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
    517                               arg1[i][ACOMP] - 0.5F) * scaleA;
    518          }
    519          break;
    520       case GL_MODULATE_SUBTRACT_ATI:
    521          for (i = 0; i < n; i++) {
    522             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
    523                               - arg1[i][ACOMP]) * scaleA;
    524          }
    525          break;
    526       default:
    527          _mesa_problem(ctx, "invalid combine mode");
    528       }
    529    }
    530 
    531    /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
    532     * This is kind of a kludge.  It would have been better if the spec
    533     * were written such that the GL_COMBINE_ALPHA value could be set to
    534     * GL_DOT3.
    535     */
    536    if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
    537        combine->ModeRGB == GL_DOT3_RGBA) {
    538       for (i = 0; i < n; i++) {
    539 	 rgba[i][ACOMP] = rgba[i][RCOMP];
    540       }
    541    }
    542 
    543    for (i = 0; i < n; i++) {
    544       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
    545       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
    546       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
    547       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
    548    }
    549    /* The span->array->rgba values are of CHAN type so set
    550     * span->array->ChanType field accordingly.
    551     */
    552    span->array->ChanType = CHAN_TYPE;
    553 
    554 end:
    555    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
    556       free(ccolor[i]);
    557    }
    558    free(rgba);
    559 }
    560 
    561 
    562 /**
    563  * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
    564  * See GL_EXT_texture_swizzle.
    565  */
    566 static void
    567 swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
    568 {
    569    const GLuint swzR = GET_SWZ(swizzle, 0);
    570    const GLuint swzG = GET_SWZ(swizzle, 1);
    571    const GLuint swzB = GET_SWZ(swizzle, 2);
    572    const GLuint swzA = GET_SWZ(swizzle, 3);
    573    GLfloat vector[6];
    574    GLuint i;
    575 
    576    vector[SWIZZLE_ZERO] = 0;
    577    vector[SWIZZLE_ONE] = 1.0F;
    578 
    579    for (i = 0; i < count; i++) {
    580       vector[SWIZZLE_X] = texels[i][0];
    581       vector[SWIZZLE_Y] = texels[i][1];
    582       vector[SWIZZLE_Z] = texels[i][2];
    583       vector[SWIZZLE_W] = texels[i][3];
    584       texels[i][RCOMP] = vector[swzR];
    585       texels[i][GCOMP] = vector[swzG];
    586       texels[i][BCOMP] = vector[swzB];
    587       texels[i][ACOMP] = vector[swzA];
    588    }
    589 }
    590 
    591 
    592 /**
    593  * Apply texture mapping to a span of fragments.
    594  */
    595 void
    596 _swrast_texture_span( struct gl_context *ctx, SWspan *span )
    597 {
    598    SWcontext *swrast = SWRAST_CONTEXT(ctx);
    599    float4_array primary_rgba;
    600    GLuint unit;
    601 
    602    if (!swrast->TexelBuffer) {
    603 #ifdef _OPENMP
    604       const GLint maxThreads = omp_get_max_threads();
    605 #else
    606       const GLint maxThreads = 1;
    607 #endif
    608 
    609       /* TexelBuffer is also global and normally shared by all SWspan
    610        * instances; when running with multiple threads, create one per
    611        * thread.
    612        */
    613       swrast->TexelBuffer =
    614 	 (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
    615 			    SWRAST_MAX_WIDTH * 4 * sizeof(GLfloat));
    616       if (!swrast->TexelBuffer) {
    617 	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
    618 	 return;
    619       }
    620    }
    621 
    622    primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat));
    623 
    624    if (!primary_rgba) {
    625       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
    626       return;
    627    }
    628 
    629    ASSERT(span->end <= SWRAST_MAX_WIDTH);
    630 
    631    /*
    632     * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
    633     */
    634    if (swrast->_TextureCombinePrimary) {
    635       GLuint i;
    636       for (i = 0; i < span->end; i++) {
    637          primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
    638          primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
    639          primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
    640          primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
    641       }
    642    }
    643 
    644    /* First must sample all bump maps */
    645    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
    646       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
    647 
    648       if (texUnit->_ReallyEnabled &&
    649          texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) {
    650          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
    651             span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
    652          float4_array targetcoords =
    653             span->array->attribs[FRAG_ATTRIB_TEX0 +
    654                ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0];
    655 
    656          const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
    657          GLfloat *lambda = span->array->lambda[unit];
    658          float4_array texels = get_texel_array(swrast, unit);
    659          GLuint i;
    660          GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0];
    661          GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1];
    662          GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2];
    663          GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3];
    664 
    665          /* adjust texture lod (lambda) */
    666          if (span->arrayMask & SPAN_LAMBDA) {
    667             if (texUnit->LodBias + samp->LodBias != 0.0F) {
    668                /* apply LOD bias, but don't clamp yet */
    669                const GLfloat bias = CLAMP(texUnit->LodBias + samp->LodBias,
    670                                           -ctx->Const.MaxTextureLodBias,
    671                                           ctx->Const.MaxTextureLodBias);
    672                GLuint i;
    673                for (i = 0; i < span->end; i++) {
    674                   lambda[i] += bias;
    675                }
    676             }
    677 
    678             if (samp->MinLod != -1000.0 ||
    679                 samp->MaxLod != 1000.0) {
    680                /* apply LOD clamping to lambda */
    681                const GLfloat min = samp->MinLod;
    682                const GLfloat max = samp->MaxLod;
    683                GLuint i;
    684                for (i = 0; i < span->end; i++) {
    685                   GLfloat l = lambda[i];
    686                   lambda[i] = CLAMP(l, min, max);
    687                }
    688             }
    689          }
    690 
    691          /* Sample the texture (span->end = number of fragments) */
    692          swrast->TextureSample[unit]( ctx, samp,
    693                                       ctx->Texture.Unit[unit]._Current,
    694                                       span->end, texcoords, lambda, texels );
    695 
    696          /* manipulate the span values of the bump target
    697             not sure this can work correctly even ignoring
    698             the problem that channel is unsigned */
    699          for (i = 0; i < span->end; i++) {
    700             targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] *
    701                                   rotMatrix01) / targetcoords[i][3];
    702             targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] *
    703                                   rotMatrix11) / targetcoords[i][3];
    704          }
    705       }
    706    }
    707 
    708    /*
    709     * Must do all texture sampling before combining in order to
    710     * accomodate GL_ARB_texture_env_crossbar.
    711     */
    712    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
    713       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
    714       if (texUnit->_ReallyEnabled &&
    715           texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) {
    716          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
    717             span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
    718          const struct gl_texture_object *curObj = texUnit->_Current;
    719          const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
    720          GLfloat *lambda = span->array->lambda[unit];
    721          float4_array texels = get_texel_array(swrast, unit);
    722 
    723          /* adjust texture lod (lambda) */
    724          if (span->arrayMask & SPAN_LAMBDA) {
    725             if (texUnit->LodBias + samp->LodBias != 0.0F) {
    726                /* apply LOD bias, but don't clamp yet */
    727                const GLfloat bias = CLAMP(texUnit->LodBias + samp->LodBias,
    728                                           -ctx->Const.MaxTextureLodBias,
    729                                           ctx->Const.MaxTextureLodBias);
    730                GLuint i;
    731                for (i = 0; i < span->end; i++) {
    732                   lambda[i] += bias;
    733                }
    734             }
    735 
    736             if (samp->MinLod != -1000.0 ||
    737                 samp->MaxLod != 1000.0) {
    738                /* apply LOD clamping to lambda */
    739                const GLfloat min = samp->MinLod;
    740                const GLfloat max = samp->MaxLod;
    741                GLuint i;
    742                for (i = 0; i < span->end; i++) {
    743                   GLfloat l = lambda[i];
    744                   lambda[i] = CLAMP(l, min, max);
    745                }
    746             }
    747          }
    748          else if (samp->MaxAnisotropy > 1.0 &&
    749                   samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
    750             /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
    751              * it requires the current SWspan *span as an additional parameter.
    752              * In order to keep the same function signature, the unused lambda
    753              * parameter will be modified to actually contain the SWspan pointer.
    754              * This is a Hack. To make it right, the texture_sample_func
    755              * signature and all implementing functions need to be modified.
    756              */
    757             /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
    758             lambda = (GLfloat *)span;
    759          }
    760 
    761          /* Sample the texture (span->end = number of fragments) */
    762          swrast->TextureSample[unit]( ctx, samp,
    763                                       ctx->Texture.Unit[unit]._Current,
    764                                       span->end, texcoords, lambda, texels );
    765 
    766          /* GL_EXT_texture_swizzle */
    767          if (curObj->_Swizzle != SWIZZLE_NOOP) {
    768             swizzle_texels(curObj->_Swizzle, span->end, texels);
    769          }
    770       }
    771    }
    772 
    773    /*
    774     * OK, now apply the texture (aka texture combine/blend).
    775     * We modify the span->color.rgba values.
    776     */
    777    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
    778       if (ctx->Texture.Unit[unit]._ReallyEnabled)
    779          texture_combine(ctx, unit, primary_rgba, swrast->TexelBuffer, span);
    780    }
    781 
    782    free(primary_rgba);
    783 }
    784