Home | History | Annotate | Download | only in swrast
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
      5  * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included
     15  * in all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     23  * OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 
     27 #include "main/glheader.h"
     28 #include "main/context.h"
     29 #include "main/imports.h"
     30 #include "main/macros.h"
     31 #include "main/pixeltransfer.h"
     32 #include "main/samplerobj.h"
     33 #include "program/prog_instruction.h"
     34 
     35 #include "s_context.h"
     36 #include "s_texcombine.h"
     37 
     38 
     39 /**
     40  * Pointer to array of float[4]
     41  * This type makes the code below more concise and avoids a lot of casting.
     42  */
     43 typedef float (*float4_array)[4];
     44 
     45 
     46 /**
     47  * Return array of texels for given unit.
     48  */
     49 static inline float4_array
     50 get_texel_array(SWcontext *swrast, GLuint unit)
     51 {
     52 #ifdef _OPENMP
     53    return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4 * omp_get_num_threads() + (SWRAST_MAX_WIDTH * 4 * omp_get_thread_num()));
     54 #else
     55    return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4);
     56 #endif
     57 }
     58 
     59 
     60 
     61 /**
     62  * Do texture application for:
     63  *  GL_EXT_texture_env_combine
     64  *  GL_ARB_texture_env_combine
     65  *  GL_EXT_texture_env_dot3
     66  *  GL_ARB_texture_env_dot3
     67  *  GL_ATI_texture_env_combine3
     68  *  GL_NV_texture_env_combine4
     69  *  conventional GL texture env modes
     70  *
     71  * \param ctx          rendering context
     72  * \param unit         the texture combiner unit
     73  * \param primary_rgba incoming fragment color array
     74  * \param texelBuffer  pointer to texel colors for all texture units
     75  *
     76  * \param span         two fields are used in this function:
     77  *                       span->end: number of fragments to process
     78  *                       span->array->rgba: incoming/result fragment colors
     79  */
     80 static void
     81 texture_combine( struct gl_context *ctx, GLuint unit,
     82                  const float4_array primary_rgba,
     83                  const GLfloat *texelBuffer,
     84                  SWspan *span )
     85 {
     86    SWcontext *swrast = SWRAST_CONTEXT(ctx);
     87    const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
     88    const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
     89    float4_array argRGB[MAX_COMBINER_TERMS];
     90    float4_array argA[MAX_COMBINER_TERMS];
     91    const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
     92    const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
     93    const GLuint numArgsRGB = combine->_NumArgsRGB;
     94    const GLuint numArgsA = combine->_NumArgsA;
     95    float4_array ccolor[4], rgba;
     96    GLuint i, term;
     97    GLuint n = span->end;
     98    GLchan (*rgbaChan)[4] = span->array->rgba;
     99 
    100    /* alloc temp pixel buffers */
    101    rgba = malloc(4 * n * sizeof(GLfloat));
    102    if (!rgba) {
    103       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
    104       return;
    105    }
    106 
    107    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
    108       ccolor[i] = malloc(4 * n * sizeof(GLfloat));
    109       if (!ccolor[i]) {
    110          while (i) {
    111             free(ccolor[i]);
    112             i--;
    113          }
    114          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
    115          free(rgba);
    116          return;
    117       }
    118    }
    119 
    120    for (i = 0; i < n; i++) {
    121       rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
    122       rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
    123       rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
    124       rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
    125    }
    126 
    127    /*
    128    printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
    129           combine->ModeRGB,
    130           combine->ModeA,
    131           combine->SourceRGB[0],
    132           combine->SourceA[0],
    133           combine->SourceRGB[1],
    134           combine->SourceA[1]);
    135    */
    136 
    137    /*
    138     * Do operand setup for up to 4 operands.  Loop over the terms.
    139     */
    140    for (term = 0; term < numArgsRGB; term++) {
    141       const GLenum srcRGB = combine->SourceRGB[term];
    142       const GLenum operandRGB = combine->OperandRGB[term];
    143 
    144       switch (srcRGB) {
    145          case GL_TEXTURE:
    146             argRGB[term] = get_texel_array(swrast, unit);
    147             break;
    148          case GL_PRIMARY_COLOR:
    149             argRGB[term] = primary_rgba;
    150             break;
    151          case GL_PREVIOUS:
    152             argRGB[term] = rgba;
    153             break;
    154          case GL_CONSTANT:
    155             {
    156                float4_array c = ccolor[term];
    157                GLfloat red   = textureUnit->EnvColor[0];
    158                GLfloat green = textureUnit->EnvColor[1];
    159                GLfloat blue  = textureUnit->EnvColor[2];
    160                GLfloat alpha = textureUnit->EnvColor[3];
    161                for (i = 0; i < n; i++) {
    162                   ASSIGN_4V(c[i], red, green, blue, alpha);
    163                }
    164                argRGB[term] = ccolor[term];
    165             }
    166             break;
    167 	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
    168 	  */
    169 	 case GL_ZERO:
    170             {
    171                float4_array c = ccolor[term];
    172                for (i = 0; i < n; i++) {
    173                   ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
    174                }
    175                argRGB[term] = ccolor[term];
    176             }
    177             break;
    178 	 case GL_ONE:
    179             {
    180                float4_array c = ccolor[term];
    181                for (i = 0; i < n; i++) {
    182                   ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
    183                }
    184                argRGB[term] = ccolor[term];
    185             }
    186             break;
    187          default:
    188             /* ARB_texture_env_crossbar source */
    189             {
    190                const GLuint srcUnit = srcRGB - GL_TEXTURE0;
    191                assert(srcUnit < ctx->Const.MaxTextureUnits);
    192                if (!ctx->Texture.Unit[srcUnit]._Current)
    193                   goto end;
    194                argRGB[term] = get_texel_array(swrast, srcUnit);
    195             }
    196       }
    197 
    198       if (operandRGB != GL_SRC_COLOR) {
    199          float4_array src = argRGB[term];
    200          float4_array dst = ccolor[term];
    201 
    202          /* point to new arg[term] storage */
    203          argRGB[term] = ccolor[term];
    204 
    205          switch (operandRGB) {
    206          case GL_ONE_MINUS_SRC_COLOR:
    207             for (i = 0; i < n; i++) {
    208                dst[i][RCOMP] = 1.0F - src[i][RCOMP];
    209                dst[i][GCOMP] = 1.0F - src[i][GCOMP];
    210                dst[i][BCOMP] = 1.0F - src[i][BCOMP];
    211             }
    212             break;
    213          case GL_SRC_ALPHA:
    214             for (i = 0; i < n; i++) {
    215                dst[i][RCOMP] =
    216                dst[i][GCOMP] =
    217                dst[i][BCOMP] = src[i][ACOMP];
    218             }
    219             break;
    220          case GL_ONE_MINUS_SRC_ALPHA:
    221             for (i = 0; i < n; i++) {
    222                dst[i][RCOMP] =
    223                dst[i][GCOMP] =
    224                dst[i][BCOMP] = 1.0F - src[i][ACOMP];
    225             }
    226             break;
    227          default:
    228             _mesa_problem(ctx, "Bad operandRGB");
    229          }
    230       }
    231    }
    232 
    233    /*
    234     * Set up the argA[term] pointers
    235     */
    236    for (term = 0; term < numArgsA; term++) {
    237       const GLenum srcA = combine->SourceA[term];
    238       const GLenum operandA = combine->OperandA[term];
    239 
    240       switch (srcA) {
    241          case GL_TEXTURE:
    242             argA[term] = get_texel_array(swrast, unit);
    243             break;
    244          case GL_PRIMARY_COLOR:
    245             argA[term] = primary_rgba;
    246             break;
    247          case GL_PREVIOUS:
    248             argA[term] = rgba;
    249             break;
    250          case GL_CONSTANT:
    251             {
    252                float4_array c = ccolor[term];
    253                GLfloat alpha = textureUnit->EnvColor[3];
    254                for (i = 0; i < n; i++)
    255                   c[i][ACOMP] = alpha;
    256                argA[term] = ccolor[term];
    257             }
    258             break;
    259 	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
    260 	  */
    261 	 case GL_ZERO:
    262             {
    263                float4_array c = ccolor[term];
    264                for (i = 0; i < n; i++)
    265                   c[i][ACOMP] = 0.0F;
    266                argA[term] = ccolor[term];
    267             }
    268             break;
    269 	 case GL_ONE:
    270             {
    271                float4_array c = ccolor[term];
    272                for (i = 0; i < n; i++)
    273                   c[i][ACOMP] = 1.0F;
    274                argA[term] = ccolor[term];
    275             }
    276             break;
    277          default:
    278             /* ARB_texture_env_crossbar source */
    279             {
    280                const GLuint srcUnit = srcA - GL_TEXTURE0;
    281                assert(srcUnit < ctx->Const.MaxTextureUnits);
    282                if (!ctx->Texture.Unit[srcUnit]._Current)
    283                   goto end;
    284                argA[term] = get_texel_array(swrast, srcUnit);
    285             }
    286       }
    287 
    288       if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
    289          float4_array src = argA[term];
    290          float4_array dst = ccolor[term];
    291          argA[term] = ccolor[term];
    292          for (i = 0; i < n; i++) {
    293             dst[i][ACOMP] = 1.0F - src[i][ACOMP];
    294          }
    295       }
    296    }
    297 
    298    /* RGB channel combine */
    299    {
    300       float4_array arg0 = argRGB[0];
    301       float4_array arg1 = argRGB[1];
    302       float4_array arg2 = argRGB[2];
    303       float4_array arg3 = argRGB[3];
    304 
    305       switch (combine->ModeRGB) {
    306       case GL_REPLACE:
    307          for (i = 0; i < n; i++) {
    308             rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
    309             rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
    310             rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
    311          }
    312          break;
    313       case GL_MODULATE:
    314          for (i = 0; i < n; i++) {
    315             rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
    316             rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
    317             rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
    318          }
    319          break;
    320       case GL_ADD:
    321          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
    322             /* (a * b) + (c * d) */
    323             for (i = 0; i < n; i++) {
    324                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
    325                                  arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
    326                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
    327                                  arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
    328                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
    329                                  arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
    330             }
    331          }
    332          else {
    333             /* 2-term addition */
    334             for (i = 0; i < n; i++) {
    335                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
    336                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
    337                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
    338             }
    339          }
    340          break;
    341       case GL_ADD_SIGNED:
    342          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
    343             /* (a * b) + (c * d) - 0.5 */
    344             for (i = 0; i < n; i++) {
    345                rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
    346                                  arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
    347                rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
    348                                  arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
    349                rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
    350                                  arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
    351             }
    352          }
    353          else {
    354             for (i = 0; i < n; i++) {
    355                rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
    356                rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
    357                rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
    358             }
    359          }
    360          break;
    361       case GL_INTERPOLATE:
    362          for (i = 0; i < n; i++) {
    363             rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
    364                           arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
    365             rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
    366                           arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
    367             rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
    368                           arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
    369          }
    370          break;
    371       case GL_SUBTRACT:
    372          for (i = 0; i < n; i++) {
    373             rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
    374             rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
    375             rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
    376          }
    377          break;
    378       case GL_DOT3_RGB_EXT:
    379       case GL_DOT3_RGBA_EXT:
    380          /* Do not scale the result by 1 2 or 4 */
    381          for (i = 0; i < n; i++) {
    382             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
    383                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
    384                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
    385                * 4.0F;
    386             dot = CLAMP(dot, 0.0F, 1.0F);
    387             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
    388          }
    389          break;
    390       case GL_DOT3_RGB:
    391       case GL_DOT3_RGBA:
    392          /* DO scale the result by 1 2 or 4 */
    393          for (i = 0; i < n; i++) {
    394             GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
    395                            (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
    396                            (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
    397                * 4.0F * scaleRGB;
    398             dot = CLAMP(dot, 0.0F, 1.0F);
    399             rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
    400          }
    401          break;
    402       case GL_MODULATE_ADD_ATI:
    403          for (i = 0; i < n; i++) {
    404             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
    405                               arg1[i][RCOMP]) * scaleRGB;
    406             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
    407                               arg1[i][GCOMP]) * scaleRGB;
    408             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
    409                               arg1[i][BCOMP]) * scaleRGB;
    410 	 }
    411          break;
    412       case GL_MODULATE_SIGNED_ADD_ATI:
    413          for (i = 0; i < n; i++) {
    414             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
    415                               arg1[i][RCOMP] - 0.5F) * scaleRGB;
    416             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
    417                               arg1[i][GCOMP] - 0.5F) * scaleRGB;
    418             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
    419                               arg1[i][BCOMP] - 0.5F) * scaleRGB;
    420 	 }
    421          break;
    422       case GL_MODULATE_SUBTRACT_ATI:
    423          for (i = 0; i < n; i++) {
    424             rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
    425                               arg1[i][RCOMP]) * scaleRGB;
    426             rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
    427                               arg1[i][GCOMP]) * scaleRGB;
    428             rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
    429                               arg1[i][BCOMP]) * scaleRGB;
    430 	 }
    431          break;
    432       default:
    433          _mesa_problem(ctx, "invalid combine mode");
    434       }
    435    }
    436 
    437    /* Alpha channel combine */
    438    {
    439       float4_array arg0 = argA[0];
    440       float4_array arg1 = argA[1];
    441       float4_array arg2 = argA[2];
    442       float4_array arg3 = argA[3];
    443 
    444       switch (combine->ModeA) {
    445       case GL_REPLACE:
    446          for (i = 0; i < n; i++) {
    447             rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
    448          }
    449          break;
    450       case GL_MODULATE:
    451          for (i = 0; i < n; i++) {
    452             rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
    453          }
    454          break;
    455       case GL_ADD:
    456          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
    457             /* (a * b) + (c * d) */
    458             for (i = 0; i < n; i++) {
    459                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
    460                                  arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
    461             }
    462          }
    463          else {
    464             /* two-term add */
    465             for (i = 0; i < n; i++) {
    466                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
    467             }
    468          }
    469          break;
    470       case GL_ADD_SIGNED:
    471          if (textureUnit->EnvMode == GL_COMBINE4_NV) {
    472             /* (a * b) + (c * d) - 0.5 */
    473             for (i = 0; i < n; i++) {
    474                rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
    475                                  arg2[i][ACOMP] * arg3[i][ACOMP] -
    476                                  0.5F) * scaleA;
    477             }
    478          }
    479          else {
    480             /* a + b - 0.5 */
    481             for (i = 0; i < n; i++) {
    482                rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
    483             }
    484          }
    485          break;
    486       case GL_INTERPOLATE:
    487          for (i = 0; i < n; i++) {
    488             rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
    489                               arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
    490                * scaleA;
    491          }
    492          break;
    493       case GL_SUBTRACT:
    494          for (i = 0; i < n; i++) {
    495             rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
    496          }
    497          break;
    498       case GL_MODULATE_ADD_ATI:
    499          for (i = 0; i < n; i++) {
    500             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
    501                               + arg1[i][ACOMP]) * scaleA;
    502          }
    503          break;
    504       case GL_MODULATE_SIGNED_ADD_ATI:
    505          for (i = 0; i < n; i++) {
    506             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
    507                               arg1[i][ACOMP] - 0.5F) * scaleA;
    508          }
    509          break;
    510       case GL_MODULATE_SUBTRACT_ATI:
    511          for (i = 0; i < n; i++) {
    512             rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
    513                               - arg1[i][ACOMP]) * scaleA;
    514          }
    515          break;
    516       default:
    517          _mesa_problem(ctx, "invalid combine mode");
    518       }
    519    }
    520 
    521    /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
    522     * This is kind of a kludge.  It would have been better if the spec
    523     * were written such that the GL_COMBINE_ALPHA value could be set to
    524     * GL_DOT3.
    525     */
    526    if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
    527        combine->ModeRGB == GL_DOT3_RGBA) {
    528       for (i = 0; i < n; i++) {
    529 	 rgba[i][ACOMP] = rgba[i][RCOMP];
    530       }
    531    }
    532 
    533    for (i = 0; i < n; i++) {
    534       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
    535       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
    536       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
    537       UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
    538    }
    539    /* The span->array->rgba values are of CHAN type so set
    540     * span->array->ChanType field accordingly.
    541     */
    542    span->array->ChanType = CHAN_TYPE;
    543 
    544 end:
    545    for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
    546       free(ccolor[i]);
    547    }
    548    free(rgba);
    549 }
    550 
    551 
    552 /**
    553  * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
    554  * See GL_EXT_texture_swizzle.
    555  */
    556 static void
    557 swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
    558 {
    559    const GLuint swzR = GET_SWZ(swizzle, 0);
    560    const GLuint swzG = GET_SWZ(swizzle, 1);
    561    const GLuint swzB = GET_SWZ(swizzle, 2);
    562    const GLuint swzA = GET_SWZ(swizzle, 3);
    563    GLfloat vector[6];
    564    GLuint i;
    565 
    566    vector[SWIZZLE_ZERO] = 0;
    567    vector[SWIZZLE_ONE] = 1.0F;
    568 
    569    for (i = 0; i < count; i++) {
    570       vector[SWIZZLE_X] = texels[i][0];
    571       vector[SWIZZLE_Y] = texels[i][1];
    572       vector[SWIZZLE_Z] = texels[i][2];
    573       vector[SWIZZLE_W] = texels[i][3];
    574       texels[i][RCOMP] = vector[swzR];
    575       texels[i][GCOMP] = vector[swzG];
    576       texels[i][BCOMP] = vector[swzB];
    577       texels[i][ACOMP] = vector[swzA];
    578    }
    579 }
    580 
    581 
    582 /**
    583  * Apply texture mapping to a span of fragments.
    584  */
    585 void
    586 _swrast_texture_span( struct gl_context *ctx, SWspan *span )
    587 {
    588    SWcontext *swrast = SWRAST_CONTEXT(ctx);
    589    float4_array primary_rgba;
    590    GLuint unit;
    591 
    592    if (!swrast->TexelBuffer) {
    593 #ifdef _OPENMP
    594       const GLint maxThreads = omp_get_max_threads();
    595 
    596       /* TexelBuffer memory allocation needs to be done in a critical section
    597        * as this code runs in a parallel loop.
    598        * When entering the section, first check if TexelBuffer has been
    599        * initialized already by another thread while this thread was waiting.
    600        */
    601       #pragma omp critical
    602       if (!swrast->TexelBuffer) {
    603 #else
    604       const GLint maxThreads = 1;
    605 #endif
    606 
    607       /* TexelBuffer is also global and normally shared by all SWspan
    608        * instances; when running with multiple threads, create one per
    609        * thread.
    610        */
    611       swrast->TexelBuffer =
    612 	 malloc(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits * maxThreads *
    613 			    SWRAST_MAX_WIDTH * 4 * sizeof(GLfloat));
    614 #ifdef _OPENMP
    615       } /* critical section */
    616 #endif
    617 
    618       if (!swrast->TexelBuffer) {
    619 	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
    620 	 return;
    621       }
    622    }
    623 
    624    primary_rgba = malloc(span->end * 4 * sizeof(GLfloat));
    625 
    626    if (!primary_rgba) {
    627       _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
    628       return;
    629    }
    630 
    631    assert(span->end <= SWRAST_MAX_WIDTH);
    632 
    633    /*
    634     * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
    635     */
    636    if (swrast->_TextureCombinePrimary) {
    637       GLuint i;
    638       for (i = 0; i < span->end; i++) {
    639          primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
    640          primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
    641          primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
    642          primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
    643       }
    644    }
    645 
    646    /*
    647     * Must do all texture sampling before combining in order to
    648     * accommodate GL_ARB_texture_env_crossbar.
    649     */
    650    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
    651       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
    652       if (texUnit->_Current) {
    653          const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
    654             span->array->attribs[VARYING_SLOT_TEX0 + unit];
    655          const struct gl_texture_object *curObj = texUnit->_Current;
    656          const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
    657          GLfloat *lambda = span->array->lambda[unit];
    658          float4_array texels = get_texel_array(swrast, unit);
    659 
    660          /* adjust texture lod (lambda) */
    661          if (span->arrayMask & SPAN_LAMBDA) {
    662             if (texUnit->LodBias + samp->LodBias != 0.0F) {
    663                /* apply LOD bias, but don't clamp yet */
    664                const GLfloat bias = CLAMP(texUnit->LodBias + samp->LodBias,
    665                                           -ctx->Const.MaxTextureLodBias,
    666                                           ctx->Const.MaxTextureLodBias);
    667                GLuint i;
    668                for (i = 0; i < span->end; i++) {
    669                   lambda[i] += bias;
    670                }
    671             }
    672 
    673             if (samp->MinLod != -1000.0F ||
    674                 samp->MaxLod != 1000.0F) {
    675                /* apply LOD clamping to lambda */
    676                const GLfloat min = samp->MinLod;
    677                const GLfloat max = samp->MaxLod;
    678                GLuint i;
    679                for (i = 0; i < span->end; i++) {
    680                   GLfloat l = lambda[i];
    681                   lambda[i] = CLAMP(l, min, max);
    682                }
    683             }
    684          }
    685          else if (samp->MaxAnisotropy > 1.0F &&
    686                   samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
    687             /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
    688              * it requires the current SWspan *span as an additional parameter.
    689              * In order to keep the same function signature, the unused lambda
    690              * parameter will be modified to actually contain the SWspan pointer.
    691              * This is a Hack. To make it right, the texture_sample_func
    692              * signature and all implementing functions need to be modified.
    693              */
    694             /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */
    695             lambda = (GLfloat *)span;
    696          }
    697 
    698          /* Sample the texture (span->end = number of fragments) */
    699          swrast->TextureSample[unit]( ctx, samp,
    700                                       ctx->Texture.Unit[unit]._Current,
    701                                       span->end, texcoords, lambda, texels );
    702 
    703          /* GL_EXT_texture_swizzle */
    704          if (curObj->_Swizzle != SWIZZLE_NOOP) {
    705             swizzle_texels(curObj->_Swizzle, span->end, texels);
    706          }
    707       }
    708    }
    709 
    710    /*
    711     * OK, now apply the texture (aka texture combine/blend).
    712     * We modify the span->color.rgba values.
    713     */
    714    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
    715       if (ctx->Texture.Unit[unit]._Current)
    716          texture_combine(ctx, unit, primary_rgba, swrast->TexelBuffer, span);
    717    }
    718 
    719    free(primary_rgba);
    720 }
    721