Home | History | Annotate | Download | only in main
      1 /*
      2  * Mesa 3-D graphics library
      3  * Version:  7.1
      4  *
      5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included
     15  * in all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     23  */
     24 
     25 
     26 /**
     27  * \file texcompress_fxt1.c
     28  * GL_3DFX_texture_compression_FXT1 support.
     29  */
     30 
     31 
     32 #include "glheader.h"
     33 #include "imports.h"
     34 #include "colormac.h"
     35 #include "image.h"
     36 #include "macros.h"
     37 #include "mfeatures.h"
     38 #include "mipmap.h"
     39 #include "texcompress.h"
     40 #include "texcompress_fxt1.h"
     41 #include "texstore.h"
     42 #include "swrast/s_context.h"
     43 
     44 
     45 #if FEATURE_texture_fxt1
     46 
     47 
     48 static void
     49 fxt1_encode (GLuint width, GLuint height, GLint comps,
     50              const void *source, GLint srcRowStride,
     51              void *dest, GLint destRowStride);
     52 
     53 void
     54 fxt1_decode_1 (const void *texture, GLint stride,
     55                GLint i, GLint j, GLubyte *rgba);
     56 
     57 
     58 /**
     59  * Store user's image in rgb_fxt1 format.
     60  */
     61 GLboolean
     62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
     63 {
     64    const GLubyte *pixels;
     65    GLint srcRowStride;
     66    GLubyte *dst;
     67    const GLubyte *tempImage = NULL;
     68 
     69    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
     70 
     71    if (srcFormat != GL_RGB ||
     72        srcType != GL_UNSIGNED_BYTE ||
     73        ctx->_ImageTransferState ||
     74        srcPacking->RowLength != srcWidth ||
     75        srcPacking->SwapBytes) {
     76       /* convert image to RGB/GLubyte */
     77       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
     78                                              baseInternalFormat,
     79                                              _mesa_get_format_base_format(dstFormat),
     80                                              srcWidth, srcHeight, srcDepth,
     81                                              srcFormat, srcType, srcAddr,
     82                                              srcPacking);
     83       if (!tempImage)
     84          return GL_FALSE; /* out of memory */
     85       pixels = tempImage;
     86       srcRowStride = 3 * srcWidth;
     87       srcFormat = GL_RGB;
     88    }
     89    else {
     90       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
     91                                      srcFormat, srcType, 0, 0);
     92 
     93       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
     94                                             srcType) / sizeof(GLubyte);
     95    }
     96 
     97    dst = dstSlices[0];
     98 
     99    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
    100                dst, dstRowStride);
    101 
    102    if (tempImage)
    103       free((void*) tempImage);
    104 
    105    return GL_TRUE;
    106 }
    107 
    108 
    109 /**
    110  * Store user's image in rgba_fxt1 format.
    111  */
    112 GLboolean
    113 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
    114 {
    115    const GLubyte *pixels;
    116    GLint srcRowStride;
    117    GLubyte *dst;
    118    const GLubyte *tempImage = NULL;
    119 
    120    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
    121 
    122    if (srcFormat != GL_RGBA ||
    123        srcType != GL_UNSIGNED_BYTE ||
    124        ctx->_ImageTransferState ||
    125        srcPacking->SwapBytes) {
    126       /* convert image to RGBA/GLubyte */
    127       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
    128                                              baseInternalFormat,
    129                                              _mesa_get_format_base_format(dstFormat),
    130                                              srcWidth, srcHeight, srcDepth,
    131                                              srcFormat, srcType, srcAddr,
    132                                              srcPacking);
    133       if (!tempImage)
    134          return GL_FALSE; /* out of memory */
    135       pixels = tempImage;
    136       srcRowStride = 4 * srcWidth;
    137       srcFormat = GL_RGBA;
    138    }
    139    else {
    140       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
    141                                      srcFormat, srcType, 0, 0);
    142 
    143       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
    144                                             srcType) / sizeof(GLubyte);
    145    }
    146 
    147    dst = dstSlices[0];
    148 
    149    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
    150                dst, dstRowStride);
    151 
    152    if (tempImage)
    153       free((void*) tempImage);
    154 
    155    return GL_TRUE;
    156 }
    157 
    158 
    159 void
    160 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
    161                                   GLint i, GLint j, GLint k, GLfloat *texel )
    162 {
    163    /* just sample as GLubyte and convert to float here */
    164    GLubyte rgba[4];
    165    (void) k;
    166    fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
    167    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
    168    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
    169    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
    170    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
    171 }
    172 
    173 
    174 void
    175 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
    176                                  GLint i, GLint j, GLint k, GLfloat *texel )
    177 {
    178    /* just sample as GLubyte and convert to float here */
    179    GLubyte rgba[4];
    180    (void) k;
    181    fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
    182    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
    183    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
    184    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
    185    texel[ACOMP] = 1.0F;
    186 }
    187 
    188 
    189 
    190 /***************************************************************************\
    191  * FXT1 encoder
    192  *
    193  * The encoder was built by reversing the decoder,
    194  * and is vaguely based on Texus2 by 3dfx. Note that this code
    195  * is merely a proof of concept, since it is highly UNoptimized;
    196  * moreover, it is sub-optimal due to initial conditions passed
    197  * to Lloyd's algorithm (the interpolation modes are even worse).
    198 \***************************************************************************/
    199 
    200 
    201 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
    202 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
    203 #define N_TEXELS 32 /* number of texels in a block (always 32) */
    204 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
    205 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
    206 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
    207 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
    208 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
    209 
    210 
    211 /*
    212  * Define a 64-bit unsigned integer type and macros
    213  */
    214 #if 1
    215 
    216 #define FX64_NATIVE 1
    217 
    218 typedef uint64_t Fx64;
    219 
    220 #define FX64_MOV32(a, b) a = b
    221 #define FX64_OR32(a, b)  a |= b
    222 #define FX64_SHL(a, c)   a <<= c
    223 
    224 #else
    225 
    226 #define FX64_NATIVE 0
    227 
    228 typedef struct {
    229    GLuint lo, hi;
    230 } Fx64;
    231 
    232 #define FX64_MOV32(a, b) a.lo = b
    233 #define FX64_OR32(a, b)  a.lo |= b
    234 
    235 #define FX64_SHL(a, c)                                 \
    236    do {                                                \
    237        if ((c) >= 32) {                                \
    238           a.hi = a.lo << ((c) - 32);                   \
    239           a.lo = 0;                                    \
    240        } else {                                        \
    241           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
    242           a.lo <<= (c);                                \
    243        }                                               \
    244    } while (0)
    245 
    246 #endif
    247 
    248 
    249 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
    250 #define SAFECDOT 1 /* for paranoids */
    251 
    252 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
    253    do {                                  \
    254       /* compute interpolation vector */ \
    255       GLfloat d2 = 0.0F;                 \
    256       GLfloat rd2;                       \
    257                                          \
    258       for (i = 0; i < NC; i++) {         \
    259          IV[i] = (V1[i] - V0[i]) * F(i); \
    260          d2 += IV[i] * IV[i];            \
    261       }                                  \
    262       rd2 = (GLfloat)NV / d2;            \
    263       B = 0;                             \
    264       for (i = 0; i < NC; i++) {         \
    265          IV[i] *= F(i);                  \
    266          B -= IV[i] * V0[i];             \
    267          IV[i] *= rd2;                   \
    268       }                                  \
    269       B = B * rd2 + 0.5f;                \
    270    } while (0)
    271 
    272 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
    273    do {                                  \
    274       GLfloat dot = 0.0F;                \
    275       for (i = 0; i < NC; i++) {         \
    276          dot += V[i] * IV[i];            \
    277       }                                  \
    278       TEXEL = (GLint)(dot + B);          \
    279       if (SAFECDOT) {                    \
    280          if (TEXEL < 0) {                \
    281             TEXEL = 0;                   \
    282          } else if (TEXEL > NV) {        \
    283             TEXEL = NV;                  \
    284          }                               \
    285       }                                  \
    286    } while (0)
    287 
    288 
    289 static GLint
    290 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
    291               GLubyte input[MAX_COMP], GLint nc)
    292 {
    293    GLint i, j, best = -1;
    294    GLfloat err = 1e9; /* big enough */
    295 
    296    for (j = 0; j < nv; j++) {
    297       GLfloat e = 0.0F;
    298       for (i = 0; i < nc; i++) {
    299          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
    300       }
    301       if (e < err) {
    302          err = e;
    303          best = j;
    304       }
    305    }
    306 
    307    return best;
    308 }
    309 
    310 
    311 static GLint
    312 fxt1_worst (GLfloat vec[MAX_COMP],
    313             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
    314 {
    315    GLint i, k, worst = -1;
    316    GLfloat err = -1.0F; /* small enough */
    317 
    318    for (k = 0; k < n; k++) {
    319       GLfloat e = 0.0F;
    320       for (i = 0; i < nc; i++) {
    321          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
    322       }
    323       if (e > err) {
    324          err = e;
    325          worst = k;
    326       }
    327    }
    328 
    329    return worst;
    330 }
    331 
    332 
    333 static GLint
    334 fxt1_variance (GLdouble variance[MAX_COMP],
    335                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
    336 {
    337    GLint i, k, best = 0;
    338    GLint sx, sx2;
    339    GLdouble var, maxvar = -1; /* small enough */
    340    GLdouble teenth = 1.0 / n;
    341 
    342    for (i = 0; i < nc; i++) {
    343       sx = sx2 = 0;
    344       for (k = 0; k < n; k++) {
    345          GLint t = input[k][i];
    346          sx += t;
    347          sx2 += t * t;
    348       }
    349       var = sx2 * teenth - sx * sx * teenth * teenth;
    350       if (maxvar < var) {
    351          maxvar = var;
    352          best = i;
    353       }
    354       if (variance) {
    355          variance[i] = var;
    356       }
    357    }
    358 
    359    return best;
    360 }
    361 
    362 
    363 static GLint
    364 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
    365              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
    366 {
    367 #if 0
    368    /* Choose colors from a grid.
    369     */
    370    GLint i, j;
    371 
    372    for (j = 0; j < nv; j++) {
    373       GLint m = j * (n - 1) / (nv - 1);
    374       for (i = 0; i < nc; i++) {
    375          vec[j][i] = input[m][i];
    376       }
    377    }
    378 #else
    379    /* Our solution here is to find the darkest and brightest colors in
    380     * the 8x4 tile and use those as the two representative colors.
    381     * There are probably better algorithms to use (histogram-based).
    382     */
    383    GLint i, j, k;
    384    GLint minSum = 2000; /* big enough */
    385    GLint maxSum = -1; /* small enough */
    386    GLint minCol = 0; /* phoudoin: silent compiler! */
    387    GLint maxCol = 0; /* phoudoin: silent compiler! */
    388 
    389    struct {
    390       GLint flag;
    391       GLint key;
    392       GLint freq;
    393       GLint idx;
    394    } hist[N_TEXELS];
    395    GLint lenh = 0;
    396 
    397    memset(hist, 0, sizeof(hist));
    398 
    399    for (k = 0; k < n; k++) {
    400       GLint l;
    401       GLint key = 0;
    402       GLint sum = 0;
    403       for (i = 0; i < nc; i++) {
    404          key <<= 8;
    405          key |= input[k][i];
    406          sum += input[k][i];
    407       }
    408       for (l = 0; l < n; l++) {
    409          if (!hist[l].flag) {
    410             /* alloc new slot */
    411             hist[l].flag = !0;
    412             hist[l].key = key;
    413             hist[l].freq = 1;
    414             hist[l].idx = k;
    415             lenh = l + 1;
    416             break;
    417          } else if (hist[l].key == key) {
    418             hist[l].freq++;
    419             break;
    420          }
    421       }
    422       if (minSum > sum) {
    423          minSum = sum;
    424          minCol = k;
    425       }
    426       if (maxSum < sum) {
    427          maxSum = sum;
    428          maxCol = k;
    429       }
    430    }
    431 
    432    if (lenh <= nv) {
    433       for (j = 0; j < lenh; j++) {
    434          for (i = 0; i < nc; i++) {
    435             vec[j][i] = (GLfloat)input[hist[j].idx][i];
    436          }
    437       }
    438       for (; j < nv; j++) {
    439          for (i = 0; i < nc; i++) {
    440             vec[j][i] = vec[0][i];
    441          }
    442       }
    443       return 0;
    444    }
    445 
    446    for (j = 0; j < nv; j++) {
    447       for (i = 0; i < nc; i++) {
    448          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
    449       }
    450    }
    451 #endif
    452 
    453    return !0;
    454 }
    455 
    456 
    457 static GLint
    458 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
    459             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
    460 {
    461    /* Use the generalized lloyd's algorithm for VQ:
    462     *     find 4 color vectors.
    463     *
    464     *     for each sample color
    465     *         sort to nearest vector.
    466     *
    467     *     replace each vector with the centroid of its matching colors.
    468     *
    469     *     repeat until RMS doesn't improve.
    470     *
    471     *     if a color vector has no samples, or becomes the same as another
    472     *     vector, replace it with the color which is farthest from a sample.
    473     *
    474     * vec[][MAX_COMP]           initial vectors and resulting colors
    475     * nv                        number of resulting colors required
    476     * input[N_TEXELS][MAX_COMP] input texels
    477     * nc                        number of components in input / vec
    478     * n                         number of input samples
    479     */
    480 
    481    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
    482    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
    483    GLfloat error, lasterror = 1e9;
    484 
    485    GLint i, j, k, rep;
    486 
    487    /* the quantizer */
    488    for (rep = 0; rep < LL_N_REP; rep++) {
    489       /* reset sums & counters */
    490       for (j = 0; j < nv; j++) {
    491          for (i = 0; i < nc; i++) {
    492             sum[j][i] = 0;
    493          }
    494          cnt[j] = 0;
    495       }
    496       error = 0;
    497 
    498       /* scan whole block */
    499       for (k = 0; k < n; k++) {
    500 #if 1
    501          GLint best = -1;
    502          GLfloat err = 1e9; /* big enough */
    503          /* determine best vector */
    504          for (j = 0; j < nv; j++) {
    505             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
    506                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
    507                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
    508             if (nc == 4) {
    509                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
    510             }
    511             if (e < err) {
    512                err = e;
    513                best = j;
    514             }
    515          }
    516 #else
    517          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
    518 #endif
    519          assert(best >= 0);
    520          /* add in closest color */
    521          for (i = 0; i < nc; i++) {
    522             sum[best][i] += input[k][i];
    523          }
    524          /* mark this vector as used */
    525          cnt[best]++;
    526          /* accumulate error */
    527          error += err;
    528       }
    529 
    530       /* check RMS */
    531       if ((error < LL_RMS_E) ||
    532           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
    533          return !0; /* good match */
    534       }
    535       lasterror = error;
    536 
    537       /* move each vector to the barycenter of its closest colors */
    538       for (j = 0; j < nv; j++) {
    539          if (cnt[j]) {
    540             GLfloat div = 1.0F / cnt[j];
    541             for (i = 0; i < nc; i++) {
    542                vec[j][i] = div * sum[j][i];
    543             }
    544          } else {
    545             /* this vec has no samples or is identical with a previous vec */
    546             GLint worst = fxt1_worst(vec[j], input, nc, n);
    547             for (i = 0; i < nc; i++) {
    548                vec[j][i] = input[worst][i];
    549             }
    550          }
    551       }
    552    }
    553 
    554    return 0; /* could not converge fast enough */
    555 }
    556 
    557 
    558 static void
    559 fxt1_quantize_CHROMA (GLuint *cc,
    560                       GLubyte input[N_TEXELS][MAX_COMP])
    561 {
    562    const GLint n_vect = 4; /* 4 base vectors to find */
    563    const GLint n_comp = 3; /* 3 components: R, G, B */
    564    GLfloat vec[MAX_VECT][MAX_COMP];
    565    GLint i, j, k;
    566    Fx64 hi; /* high quadword */
    567    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
    568 
    569    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
    570       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
    571    }
    572 
    573    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
    574    for (j = n_vect - 1; j >= 0; j--) {
    575       for (i = 0; i < n_comp; i++) {
    576          /* add in colors */
    577          FX64_SHL(hi, 5);
    578          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
    579       }
    580    }
    581    ((Fx64 *)cc)[1] = hi;
    582 
    583    lohi = lolo = 0;
    584    /* right microtile */
    585    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
    586       lohi <<= 2;
    587       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
    588    }
    589    /* left microtile */
    590    for (; k >= 0; k--) {
    591       lolo <<= 2;
    592       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
    593    }
    594    cc[1] = lohi;
    595    cc[0] = lolo;
    596 }
    597 
    598 
    599 static void
    600 fxt1_quantize_ALPHA0 (GLuint *cc,
    601                       GLubyte input[N_TEXELS][MAX_COMP],
    602                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
    603 {
    604    const GLint n_vect = 3; /* 3 base vectors to find */
    605    const GLint n_comp = 4; /* 4 components: R, G, B, A */
    606    GLfloat vec[MAX_VECT][MAX_COMP];
    607    GLint i, j, k;
    608    Fx64 hi; /* high quadword */
    609    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
    610 
    611    /* the last vector indicates zero */
    612    for (i = 0; i < n_comp; i++) {
    613       vec[n_vect][i] = 0;
    614    }
    615 
    616    /* the first n texels in reord are guaranteed to be non-zero */
    617    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
    618       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
    619    }
    620 
    621    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
    622    for (j = n_vect - 1; j >= 0; j--) {
    623       /* add in alphas */
    624       FX64_SHL(hi, 5);
    625       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
    626    }
    627    for (j = n_vect - 1; j >= 0; j--) {
    628       for (i = 0; i < n_comp - 1; i++) {
    629          /* add in colors */
    630          FX64_SHL(hi, 5);
    631          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
    632       }
    633    }
    634    ((Fx64 *)cc)[1] = hi;
    635 
    636    lohi = lolo = 0;
    637    /* right microtile */
    638    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
    639       lohi <<= 2;
    640       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
    641    }
    642    /* left microtile */
    643    for (; k >= 0; k--) {
    644       lolo <<= 2;
    645       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
    646    }
    647    cc[1] = lohi;
    648    cc[0] = lolo;
    649 }
    650 
    651 
    652 static void
    653 fxt1_quantize_ALPHA1 (GLuint *cc,
    654                       GLubyte input[N_TEXELS][MAX_COMP])
    655 {
    656    const GLint n_vect = 3; /* highest vector number in each microtile */
    657    const GLint n_comp = 4; /* 4 components: R, G, B, A */
    658    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
    659    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
    660    GLint i, j, k;
    661    Fx64 hi; /* high quadword */
    662    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
    663 
    664    GLint minSum;
    665    GLint maxSum;
    666    GLint minColL = 0, maxColL = 0;
    667    GLint minColR = 0, maxColR = 0;
    668    GLint sumL = 0, sumR = 0;
    669    GLint nn_comp;
    670    /* Our solution here is to find the darkest and brightest colors in
    671     * the 4x4 tile and use those as the two representative colors.
    672     * There are probably better algorithms to use (histogram-based).
    673     */
    674    nn_comp = n_comp;
    675    while ((minColL == maxColL) && nn_comp) {
    676        minSum = 2000; /* big enough */
    677        maxSum = -1; /* small enough */
    678        for (k = 0; k < N_TEXELS / 2; k++) {
    679            GLint sum = 0;
    680            for (i = 0; i < nn_comp; i++) {
    681                sum += input[k][i];
    682            }
    683            if (minSum > sum) {
    684                minSum = sum;
    685                minColL = k;
    686            }
    687            if (maxSum < sum) {
    688                maxSum = sum;
    689                maxColL = k;
    690            }
    691            sumL += sum;
    692        }
    693 
    694        nn_comp--;
    695    }
    696 
    697    nn_comp = n_comp;
    698    while ((minColR == maxColR) && nn_comp) {
    699        minSum = 2000; /* big enough */
    700        maxSum = -1; /* small enough */
    701        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
    702            GLint sum = 0;
    703            for (i = 0; i < nn_comp; i++) {
    704                sum += input[k][i];
    705            }
    706            if (minSum > sum) {
    707                minSum = sum;
    708                minColR = k;
    709            }
    710            if (maxSum < sum) {
    711                maxSum = sum;
    712                maxColR = k;
    713            }
    714            sumR += sum;
    715        }
    716 
    717        nn_comp--;
    718    }
    719 
    720    /* choose the common vector (yuck!) */
    721    {
    722       GLint j1, j2;
    723       GLint v1 = 0, v2 = 0;
    724       GLfloat err = 1e9; /* big enough */
    725       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
    726       for (i = 0; i < n_comp; i++) {
    727          tv[0][i] = input[minColL][i];
    728          tv[1][i] = input[maxColL][i];
    729          tv[2][i] = input[minColR][i];
    730          tv[3][i] = input[maxColR][i];
    731       }
    732       for (j1 = 0; j1 < 2; j1++) {
    733          for (j2 = 2; j2 < 4; j2++) {
    734             GLfloat e = 0.0F;
    735             for (i = 0; i < n_comp; i++) {
    736                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
    737             }
    738             if (e < err) {
    739                err = e;
    740                v1 = j1;
    741                v2 = j2;
    742             }
    743          }
    744       }
    745       for (i = 0; i < n_comp; i++) {
    746          vec[0][i] = tv[1 - v1][i];
    747          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
    748          vec[2][i] = tv[5 - v2][i];
    749       }
    750    }
    751 
    752    /* left microtile */
    753    cc[0] = 0;
    754    if (minColL != maxColL) {
    755       /* compute interpolation vector */
    756       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
    757 
    758       /* add in texels */
    759       lolo = 0;
    760       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
    761          GLint texel;
    762          /* interpolate color */
    763          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
    764          /* add in texel */
    765          lolo <<= 2;
    766          lolo |= texel;
    767       }
    768 
    769       cc[0] = lolo;
    770    }
    771 
    772    /* right microtile */
    773    cc[1] = 0;
    774    if (minColR != maxColR) {
    775       /* compute interpolation vector */
    776       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
    777 
    778       /* add in texels */
    779       lohi = 0;
    780       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
    781          GLint texel;
    782          /* interpolate color */
    783          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
    784          /* add in texel */
    785          lohi <<= 2;
    786          lohi |= texel;
    787       }
    788 
    789       cc[1] = lohi;
    790    }
    791 
    792    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
    793    for (j = n_vect - 1; j >= 0; j--) {
    794       /* add in alphas */
    795       FX64_SHL(hi, 5);
    796       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
    797    }
    798    for (j = n_vect - 1; j >= 0; j--) {
    799       for (i = 0; i < n_comp - 1; i++) {
    800          /* add in colors */
    801          FX64_SHL(hi, 5);
    802          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
    803       }
    804    }
    805    ((Fx64 *)cc)[1] = hi;
    806 }
    807 
    808 
    809 static void
    810 fxt1_quantize_HI (GLuint *cc,
    811                   GLubyte input[N_TEXELS][MAX_COMP],
    812                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
    813 {
    814    const GLint n_vect = 6; /* highest vector number */
    815    const GLint n_comp = 3; /* 3 components: R, G, B */
    816    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
    817    GLfloat iv[MAX_COMP];   /* interpolation vector */
    818    GLint i, k;
    819    GLuint hihi; /* high quadword: hi dword */
    820 
    821    GLint minSum = 2000; /* big enough */
    822    GLint maxSum = -1; /* small enough */
    823    GLint minCol = 0; /* phoudoin: silent compiler! */
    824    GLint maxCol = 0; /* phoudoin: silent compiler! */
    825 
    826    /* Our solution here is to find the darkest and brightest colors in
    827     * the 8x4 tile and use those as the two representative colors.
    828     * There are probably better algorithms to use (histogram-based).
    829     */
    830    for (k = 0; k < n; k++) {
    831       GLint sum = 0;
    832       for (i = 0; i < n_comp; i++) {
    833          sum += reord[k][i];
    834       }
    835       if (minSum > sum) {
    836          minSum = sum;
    837          minCol = k;
    838       }
    839       if (maxSum < sum) {
    840          maxSum = sum;
    841          maxCol = k;
    842       }
    843    }
    844 
    845    hihi = 0; /* cc-hi = "00" */
    846    for (i = 0; i < n_comp; i++) {
    847       /* add in colors */
    848       hihi <<= 5;
    849       hihi |= reord[maxCol][i] >> 3;
    850    }
    851    for (i = 0; i < n_comp; i++) {
    852       /* add in colors */
    853       hihi <<= 5;
    854       hihi |= reord[minCol][i] >> 3;
    855    }
    856    cc[3] = hihi;
    857    cc[0] = cc[1] = cc[2] = 0;
    858 
    859    /* compute interpolation vector */
    860    if (minCol != maxCol) {
    861       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
    862    }
    863 
    864    /* add in texels */
    865    for (k = N_TEXELS - 1; k >= 0; k--) {
    866       GLint t = k * 3;
    867       GLuint *kk = (GLuint *)((char *)cc + t / 8);
    868       GLint texel = n_vect + 1; /* transparent black */
    869 
    870       if (!ISTBLACK(input[k])) {
    871          if (minCol != maxCol) {
    872             /* interpolate color */
    873             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
    874             /* add in texel */
    875             kk[0] |= texel << (t & 7);
    876          }
    877       } else {
    878          /* add in texel */
    879          kk[0] |= texel << (t & 7);
    880       }
    881    }
    882 }
    883 
    884 
    885 static void
    886 fxt1_quantize_MIXED1 (GLuint *cc,
    887                       GLubyte input[N_TEXELS][MAX_COMP])
    888 {
    889    const GLint n_vect = 2; /* highest vector number in each microtile */
    890    const GLint n_comp = 3; /* 3 components: R, G, B */
    891    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
    892    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
    893    GLint i, j, k;
    894    Fx64 hi; /* high quadword */
    895    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
    896 
    897    GLint minSum;
    898    GLint maxSum;
    899    GLint minColL = 0, maxColL = -1;
    900    GLint minColR = 0, maxColR = -1;
    901 
    902    /* Our solution here is to find the darkest and brightest colors in
    903     * the 4x4 tile and use those as the two representative colors.
    904     * There are probably better algorithms to use (histogram-based).
    905     */
    906    minSum = 2000; /* big enough */
    907    maxSum = -1; /* small enough */
    908    for (k = 0; k < N_TEXELS / 2; k++) {
    909       if (!ISTBLACK(input[k])) {
    910          GLint sum = 0;
    911          for (i = 0; i < n_comp; i++) {
    912             sum += input[k][i];
    913          }
    914          if (minSum > sum) {
    915             minSum = sum;
    916             minColL = k;
    917          }
    918          if (maxSum < sum) {
    919             maxSum = sum;
    920             maxColL = k;
    921          }
    922       }
    923    }
    924    minSum = 2000; /* big enough */
    925    maxSum = -1; /* small enough */
    926    for (; k < N_TEXELS; k++) {
    927       if (!ISTBLACK(input[k])) {
    928          GLint sum = 0;
    929          for (i = 0; i < n_comp; i++) {
    930             sum += input[k][i];
    931          }
    932          if (minSum > sum) {
    933             minSum = sum;
    934             minColR = k;
    935          }
    936          if (maxSum < sum) {
    937             maxSum = sum;
    938             maxColR = k;
    939          }
    940       }
    941    }
    942 
    943    /* left microtile */
    944    if (maxColL == -1) {
    945       /* all transparent black */
    946       cc[0] = ~0u;
    947       for (i = 0; i < n_comp; i++) {
    948          vec[0][i] = 0;
    949          vec[1][i] = 0;
    950       }
    951    } else {
    952       cc[0] = 0;
    953       for (i = 0; i < n_comp; i++) {
    954          vec[0][i] = input[minColL][i];
    955          vec[1][i] = input[maxColL][i];
    956       }
    957       if (minColL != maxColL) {
    958          /* compute interpolation vector */
    959          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
    960 
    961          /* add in texels */
    962          lolo = 0;
    963          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
    964             GLint texel = n_vect + 1; /* transparent black */
    965             if (!ISTBLACK(input[k])) {
    966                /* interpolate color */
    967                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
    968             }
    969             /* add in texel */
    970             lolo <<= 2;
    971             lolo |= texel;
    972          }
    973          cc[0] = lolo;
    974       }
    975    }
    976 
    977    /* right microtile */
    978    if (maxColR == -1) {
    979       /* all transparent black */
    980       cc[1] = ~0u;
    981       for (i = 0; i < n_comp; i++) {
    982          vec[2][i] = 0;
    983          vec[3][i] = 0;
    984       }
    985    } else {
    986       cc[1] = 0;
    987       for (i = 0; i < n_comp; i++) {
    988          vec[2][i] = input[minColR][i];
    989          vec[3][i] = input[maxColR][i];
    990       }
    991       if (minColR != maxColR) {
    992          /* compute interpolation vector */
    993          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
    994 
    995          /* add in texels */
    996          lohi = 0;
    997          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
    998             GLint texel = n_vect + 1; /* transparent black */
    999             if (!ISTBLACK(input[k])) {
   1000                /* interpolate color */
   1001                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
   1002             }
   1003             /* add in texel */
   1004             lohi <<= 2;
   1005             lohi |= texel;
   1006          }
   1007          cc[1] = lohi;
   1008       }
   1009    }
   1010 
   1011    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
   1012    for (j = 2 * 2 - 1; j >= 0; j--) {
   1013       for (i = 0; i < n_comp; i++) {
   1014          /* add in colors */
   1015          FX64_SHL(hi, 5);
   1016          FX64_OR32(hi, vec[j][i] >> 3);
   1017       }
   1018    }
   1019    ((Fx64 *)cc)[1] = hi;
   1020 }
   1021 
   1022 
   1023 static void
   1024 fxt1_quantize_MIXED0 (GLuint *cc,
   1025                       GLubyte input[N_TEXELS][MAX_COMP])
   1026 {
   1027    const GLint n_vect = 3; /* highest vector number in each microtile */
   1028    const GLint n_comp = 3; /* 3 components: R, G, B */
   1029    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
   1030    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
   1031    GLint i, j, k;
   1032    Fx64 hi; /* high quadword */
   1033    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
   1034 
   1035    GLint minColL = 0, maxColL = 0;
   1036    GLint minColR = 0, maxColR = 0;
   1037 #if 0
   1038    GLint minSum;
   1039    GLint maxSum;
   1040 
   1041    /* Our solution here is to find the darkest and brightest colors in
   1042     * the 4x4 tile and use those as the two representative colors.
   1043     * There are probably better algorithms to use (histogram-based).
   1044     */
   1045    minSum = 2000; /* big enough */
   1046    maxSum = -1; /* small enough */
   1047    for (k = 0; k < N_TEXELS / 2; k++) {
   1048       GLint sum = 0;
   1049       for (i = 0; i < n_comp; i++) {
   1050          sum += input[k][i];
   1051       }
   1052       if (minSum > sum) {
   1053          minSum = sum;
   1054          minColL = k;
   1055       }
   1056       if (maxSum < sum) {
   1057          maxSum = sum;
   1058          maxColL = k;
   1059       }
   1060    }
   1061    minSum = 2000; /* big enough */
   1062    maxSum = -1; /* small enough */
   1063    for (; k < N_TEXELS; k++) {
   1064       GLint sum = 0;
   1065       for (i = 0; i < n_comp; i++) {
   1066          sum += input[k][i];
   1067       }
   1068       if (minSum > sum) {
   1069          minSum = sum;
   1070          minColR = k;
   1071       }
   1072       if (maxSum < sum) {
   1073          maxSum = sum;
   1074          maxColR = k;
   1075       }
   1076    }
   1077 #else
   1078    GLint minVal;
   1079    GLint maxVal;
   1080    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
   1081    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
   1082 
   1083    /* Scan the channel with max variance for lo & hi
   1084     * and use those as the two representative colors.
   1085     */
   1086    minVal = 2000; /* big enough */
   1087    maxVal = -1; /* small enough */
   1088    for (k = 0; k < N_TEXELS / 2; k++) {
   1089       GLint t = input[k][maxVarL];
   1090       if (minVal > t) {
   1091          minVal = t;
   1092          minColL = k;
   1093       }
   1094       if (maxVal < t) {
   1095          maxVal = t;
   1096          maxColL = k;
   1097       }
   1098    }
   1099    minVal = 2000; /* big enough */
   1100    maxVal = -1; /* small enough */
   1101    for (; k < N_TEXELS; k++) {
   1102       GLint t = input[k][maxVarR];
   1103       if (minVal > t) {
   1104          minVal = t;
   1105          minColR = k;
   1106       }
   1107       if (maxVal < t) {
   1108          maxVal = t;
   1109          maxColR = k;
   1110       }
   1111    }
   1112 #endif
   1113 
   1114    /* left microtile */
   1115    cc[0] = 0;
   1116    for (i = 0; i < n_comp; i++) {
   1117       vec[0][i] = input[minColL][i];
   1118       vec[1][i] = input[maxColL][i];
   1119    }
   1120    if (minColL != maxColL) {
   1121       /* compute interpolation vector */
   1122       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
   1123 
   1124       /* add in texels */
   1125       lolo = 0;
   1126       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
   1127          GLint texel;
   1128          /* interpolate color */
   1129          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
   1130          /* add in texel */
   1131          lolo <<= 2;
   1132          lolo |= texel;
   1133       }
   1134 
   1135       /* funky encoding for LSB of green */
   1136       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
   1137          for (i = 0; i < n_comp; i++) {
   1138             vec[1][i] = input[minColL][i];
   1139             vec[0][i] = input[maxColL][i];
   1140          }
   1141          lolo = ~lolo;
   1142       }
   1143 
   1144       cc[0] = lolo;
   1145    }
   1146 
   1147    /* right microtile */
   1148    cc[1] = 0;
   1149    for (i = 0; i < n_comp; i++) {
   1150       vec[2][i] = input[minColR][i];
   1151       vec[3][i] = input[maxColR][i];
   1152    }
   1153    if (minColR != maxColR) {
   1154       /* compute interpolation vector */
   1155       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
   1156 
   1157       /* add in texels */
   1158       lohi = 0;
   1159       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
   1160          GLint texel;
   1161          /* interpolate color */
   1162          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
   1163          /* add in texel */
   1164          lohi <<= 2;
   1165          lohi |= texel;
   1166       }
   1167 
   1168       /* funky encoding for LSB of green */
   1169       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
   1170          for (i = 0; i < n_comp; i++) {
   1171             vec[3][i] = input[minColR][i];
   1172             vec[2][i] = input[maxColR][i];
   1173          }
   1174          lohi = ~lohi;
   1175       }
   1176 
   1177       cc[1] = lohi;
   1178    }
   1179 
   1180    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
   1181    for (j = 2 * 2 - 1; j >= 0; j--) {
   1182       for (i = 0; i < n_comp; i++) {
   1183          /* add in colors */
   1184          FX64_SHL(hi, 5);
   1185          FX64_OR32(hi, vec[j][i] >> 3);
   1186       }
   1187    }
   1188    ((Fx64 *)cc)[1] = hi;
   1189 }
   1190 
   1191 
   1192 static void
   1193 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
   1194 {
   1195    GLint trualpha;
   1196    GLubyte reord[N_TEXELS][MAX_COMP];
   1197 
   1198    GLubyte input[N_TEXELS][MAX_COMP];
   1199    GLint i, k, l;
   1200 
   1201    if (comps == 3) {
   1202       /* make the whole block opaque */
   1203       memset(input, -1, sizeof(input));
   1204    }
   1205 
   1206    /* 8 texels each line */
   1207    for (l = 0; l < 4; l++) {
   1208       for (k = 0; k < 4; k++) {
   1209          for (i = 0; i < comps; i++) {
   1210             input[k + l * 4][i] = *lines[l]++;
   1211          }
   1212       }
   1213       for (; k < 8; k++) {
   1214          for (i = 0; i < comps; i++) {
   1215             input[k + l * 4 + 12][i] = *lines[l]++;
   1216          }
   1217       }
   1218    }
   1219 
   1220    /* block layout:
   1221     * 00, 01, 02, 03, 08, 09, 0a, 0b
   1222     * 10, 11, 12, 13, 18, 19, 1a, 1b
   1223     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
   1224     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
   1225     */
   1226 
   1227    /* [dBorca]
   1228     * stupidity flows forth from this
   1229     */
   1230    l = N_TEXELS;
   1231    trualpha = 0;
   1232    if (comps == 4) {
   1233       /* skip all transparent black texels */
   1234       l = 0;
   1235       for (k = 0; k < N_TEXELS; k++) {
   1236          /* test all components against 0 */
   1237          if (!ISTBLACK(input[k])) {
   1238             /* texel is not transparent black */
   1239             COPY_4UBV(reord[l], input[k]);
   1240             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
   1241                /* non-opaque texel */
   1242                trualpha = !0;
   1243             }
   1244             l++;
   1245          }
   1246       }
   1247    }
   1248 
   1249 #if 0
   1250    if (trualpha) {
   1251       fxt1_quantize_ALPHA0(cc, input, reord, l);
   1252    } else if (l == 0) {
   1253       cc[0] = cc[1] = cc[2] = -1;
   1254       cc[3] = 0;
   1255    } else if (l < N_TEXELS) {
   1256       fxt1_quantize_HI(cc, input, reord, l);
   1257    } else {
   1258       fxt1_quantize_CHROMA(cc, input);
   1259    }
   1260    (void)fxt1_quantize_ALPHA1;
   1261    (void)fxt1_quantize_MIXED1;
   1262    (void)fxt1_quantize_MIXED0;
   1263 #else
   1264    if (trualpha) {
   1265       fxt1_quantize_ALPHA1(cc, input);
   1266    } else if (l == 0) {
   1267       cc[0] = cc[1] = cc[2] = ~0u;
   1268       cc[3] = 0;
   1269    } else if (l < N_TEXELS) {
   1270       fxt1_quantize_MIXED1(cc, input);
   1271    } else {
   1272       fxt1_quantize_MIXED0(cc, input);
   1273    }
   1274    (void)fxt1_quantize_ALPHA0;
   1275    (void)fxt1_quantize_HI;
   1276    (void)fxt1_quantize_CHROMA;
   1277 #endif
   1278 }
   1279 
   1280 
   1281 
   1282 /**
   1283  * Upscale an image by replication, not (typical) stretching.
   1284  * We use this when the image width or height is less than a
   1285  * certain size (4, 8) and we need to upscale an image.
   1286  */
   1287 static void
   1288 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
   1289                    GLsizei outWidth, GLsizei outHeight,
   1290                    GLint comps, const GLubyte *src, GLint srcRowStride,
   1291                    GLubyte *dest )
   1292 {
   1293    GLint i, j, k;
   1294 
   1295    ASSERT(outWidth >= inWidth);
   1296    ASSERT(outHeight >= inHeight);
   1297 #if 0
   1298    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
   1299    ASSERT((outWidth & 3) == 0);
   1300    ASSERT((outHeight & 3) == 0);
   1301 #endif
   1302 
   1303    for (i = 0; i < outHeight; i++) {
   1304       const GLint ii = i % inHeight;
   1305       for (j = 0; j < outWidth; j++) {
   1306          const GLint jj = j % inWidth;
   1307          for (k = 0; k < comps; k++) {
   1308             dest[(i * outWidth + j) * comps + k]
   1309                = src[ii * srcRowStride + jj * comps + k];
   1310          }
   1311       }
   1312    }
   1313 }
   1314 
   1315 
   1316 static void
   1317 fxt1_encode (GLuint width, GLuint height, GLint comps,
   1318              const void *source, GLint srcRowStride,
   1319              void *dest, GLint destRowStride)
   1320 {
   1321    GLuint x, y;
   1322    const GLubyte *data;
   1323    GLuint *encoded = (GLuint *)dest;
   1324    void *newSource = NULL;
   1325 
   1326    assert(comps == 3 || comps == 4);
   1327 
   1328    /* Replicate image if width is not M8 or height is not M4 */
   1329    if ((width & 7) | (height & 3)) {
   1330       GLint newWidth = (width + 7) & ~7;
   1331       GLint newHeight = (height + 3) & ~3;
   1332       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
   1333       if (!newSource) {
   1334          GET_CURRENT_CONTEXT(ctx);
   1335          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
   1336          goto cleanUp;
   1337       }
   1338       upscale_teximage2d(width, height, newWidth, newHeight,
   1339                          comps, (const GLubyte *) source,
   1340                          srcRowStride, (GLubyte *) newSource);
   1341       source = newSource;
   1342       width = newWidth;
   1343       height = newHeight;
   1344       srcRowStride = comps * newWidth;
   1345    }
   1346 
   1347    data = (const GLubyte *) source;
   1348    destRowStride = (destRowStride - width * 2) / 4;
   1349    for (y = 0; y < height; y += 4) {
   1350       GLuint offs = 0 + (y + 0) * srcRowStride;
   1351       for (x = 0; x < width; x += 8) {
   1352          const GLubyte *lines[4];
   1353          lines[0] = &data[offs];
   1354          lines[1] = lines[0] + srcRowStride;
   1355          lines[2] = lines[1] + srcRowStride;
   1356          lines[3] = lines[2] + srcRowStride;
   1357          offs += 8 * comps;
   1358          fxt1_quantize(encoded, lines, comps);
   1359          /* 128 bits per 8x4 block */
   1360          encoded += 4;
   1361       }
   1362       encoded += destRowStride;
   1363    }
   1364 
   1365  cleanUp:
   1366    if (newSource != NULL) {
   1367       free(newSource);
   1368    }
   1369 }
   1370 
   1371 
   1372 /***************************************************************************\
   1373  * FXT1 decoder
   1374  *
   1375  * The decoder is based on GL_3DFX_texture_compression_FXT1
   1376  * specification and serves as a concept for the encoder.
   1377 \***************************************************************************/
   1378 
   1379 
   1380 /* lookup table for scaling 5 bit colors up to 8 bits */
   1381 static const GLubyte _rgb_scale_5[] = {
   1382    0,   8,   16,  25,  33,  41,  49,  58,
   1383    66,  74,  82,  90,  99,  107, 115, 123,
   1384    132, 140, 148, 156, 165, 173, 181, 189,
   1385    197, 206, 214, 222, 230, 239, 247, 255
   1386 };
   1387 
   1388 /* lookup table for scaling 6 bit colors up to 8 bits */
   1389 static const GLubyte _rgb_scale_6[] = {
   1390    0,   4,   8,   12,  16,  20,  24,  28,
   1391    32,  36,  40,  45,  49,  53,  57,  61,
   1392    65,  69,  73,  77,  81,  85,  89,  93,
   1393    97,  101, 105, 109, 113, 117, 121, 125,
   1394    130, 134, 138, 142, 146, 150, 154, 158,
   1395    162, 166, 170, 174, 178, 182, 186, 190,
   1396    194, 198, 202, 206, 210, 215, 219, 223,
   1397    227, 231, 235, 239, 243, 247, 251, 255
   1398 };
   1399 
   1400 
   1401 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
   1402 #define UP5(c) _rgb_scale_5[(c) & 31]
   1403 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
   1404 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
   1405 
   1406 
   1407 static void
   1408 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
   1409 {
   1410    const GLuint *cc;
   1411 
   1412    t *= 3;
   1413    cc = (const GLuint *)(code + t / 8);
   1414    t = (cc[0] >> (t & 7)) & 7;
   1415 
   1416    if (t == 7) {
   1417       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
   1418    } else {
   1419       GLubyte r, g, b;
   1420       cc = (const GLuint *)(code + 12);
   1421       if (t == 0) {
   1422          b = UP5(CC_SEL(cc, 0));
   1423          g = UP5(CC_SEL(cc, 5));
   1424          r = UP5(CC_SEL(cc, 10));
   1425       } else if (t == 6) {
   1426          b = UP5(CC_SEL(cc, 15));
   1427          g = UP5(CC_SEL(cc, 20));
   1428          r = UP5(CC_SEL(cc, 25));
   1429       } else {
   1430          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
   1431          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
   1432          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
   1433       }
   1434       rgba[RCOMP] = r;
   1435       rgba[GCOMP] = g;
   1436       rgba[BCOMP] = b;
   1437       rgba[ACOMP] = 255;
   1438    }
   1439 }
   1440 
   1441 
   1442 static void
   1443 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
   1444 {
   1445    const GLuint *cc;
   1446    GLuint kk;
   1447 
   1448    cc = (const GLuint *)code;
   1449    if (t & 16) {
   1450       cc++;
   1451       t &= 15;
   1452    }
   1453    t = (cc[0] >> (t * 2)) & 3;
   1454 
   1455    t *= 15;
   1456    cc = (const GLuint *)(code + 8 + t / 8);
   1457    kk = cc[0] >> (t & 7);
   1458    rgba[BCOMP] = UP5(kk);
   1459    rgba[GCOMP] = UP5(kk >> 5);
   1460    rgba[RCOMP] = UP5(kk >> 10);
   1461    rgba[ACOMP] = 255;
   1462 }
   1463 
   1464 
   1465 static void
   1466 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
   1467 {
   1468    const GLuint *cc;
   1469    GLuint col[2][3];
   1470    GLint glsb, selb;
   1471 
   1472    cc = (const GLuint *)code;
   1473    if (t & 16) {
   1474       t &= 15;
   1475       t = (cc[1] >> (t * 2)) & 3;
   1476       /* col 2 */
   1477       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
   1478       col[0][GCOMP] = CC_SEL(cc, 99);
   1479       col[0][RCOMP] = CC_SEL(cc, 104);
   1480       /* col 3 */
   1481       col[1][BCOMP] = CC_SEL(cc, 109);
   1482       col[1][GCOMP] = CC_SEL(cc, 114);
   1483       col[1][RCOMP] = CC_SEL(cc, 119);
   1484       glsb = CC_SEL(cc, 126);
   1485       selb = CC_SEL(cc, 33);
   1486    } else {
   1487       t = (cc[0] >> (t * 2)) & 3;
   1488       /* col 0 */
   1489       col[0][BCOMP] = CC_SEL(cc, 64);
   1490       col[0][GCOMP] = CC_SEL(cc, 69);
   1491       col[0][RCOMP] = CC_SEL(cc, 74);
   1492       /* col 1 */
   1493       col[1][BCOMP] = CC_SEL(cc, 79);
   1494       col[1][GCOMP] = CC_SEL(cc, 84);
   1495       col[1][RCOMP] = CC_SEL(cc, 89);
   1496       glsb = CC_SEL(cc, 125);
   1497       selb = CC_SEL(cc, 1);
   1498    }
   1499 
   1500    if (CC_SEL(cc, 124) & 1) {
   1501       /* alpha[0] == 1 */
   1502 
   1503       if (t == 3) {
   1504          /* zero */
   1505          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
   1506       } else {
   1507          GLubyte r, g, b;
   1508          if (t == 0) {
   1509             b = UP5(col[0][BCOMP]);
   1510             g = UP5(col[0][GCOMP]);
   1511             r = UP5(col[0][RCOMP]);
   1512          } else if (t == 2) {
   1513             b = UP5(col[1][BCOMP]);
   1514             g = UP6(col[1][GCOMP], glsb);
   1515             r = UP5(col[1][RCOMP]);
   1516          } else {
   1517             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
   1518             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
   1519             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
   1520          }
   1521          rgba[RCOMP] = r;
   1522          rgba[GCOMP] = g;
   1523          rgba[BCOMP] = b;
   1524          rgba[ACOMP] = 255;
   1525       }
   1526    } else {
   1527       /* alpha[0] == 0 */
   1528       GLubyte r, g, b;
   1529       if (t == 0) {
   1530          b = UP5(col[0][BCOMP]);
   1531          g = UP6(col[0][GCOMP], glsb ^ selb);
   1532          r = UP5(col[0][RCOMP]);
   1533       } else if (t == 3) {
   1534          b = UP5(col[1][BCOMP]);
   1535          g = UP6(col[1][GCOMP], glsb);
   1536          r = UP5(col[1][RCOMP]);
   1537       } else {
   1538          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
   1539          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
   1540                         UP6(col[1][GCOMP], glsb));
   1541          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
   1542       }
   1543       rgba[RCOMP] = r;
   1544       rgba[GCOMP] = g;
   1545       rgba[BCOMP] = b;
   1546       rgba[ACOMP] = 255;
   1547    }
   1548 }
   1549 
   1550 
   1551 static void
   1552 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
   1553 {
   1554    const GLuint *cc;
   1555    GLubyte r, g, b, a;
   1556 
   1557    cc = (const GLuint *)code;
   1558    if (CC_SEL(cc, 124) & 1) {
   1559       /* lerp == 1 */
   1560       GLuint col0[4];
   1561 
   1562       if (t & 16) {
   1563          t &= 15;
   1564          t = (cc[1] >> (t * 2)) & 3;
   1565          /* col 2 */
   1566          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
   1567          col0[GCOMP] = CC_SEL(cc, 99);
   1568          col0[RCOMP] = CC_SEL(cc, 104);
   1569          col0[ACOMP] = CC_SEL(cc, 119);
   1570       } else {
   1571          t = (cc[0] >> (t * 2)) & 3;
   1572          /* col 0 */
   1573          col0[BCOMP] = CC_SEL(cc, 64);
   1574          col0[GCOMP] = CC_SEL(cc, 69);
   1575          col0[RCOMP] = CC_SEL(cc, 74);
   1576          col0[ACOMP] = CC_SEL(cc, 109);
   1577       }
   1578 
   1579       if (t == 0) {
   1580          b = UP5(col0[BCOMP]);
   1581          g = UP5(col0[GCOMP]);
   1582          r = UP5(col0[RCOMP]);
   1583          a = UP5(col0[ACOMP]);
   1584       } else if (t == 3) {
   1585          b = UP5(CC_SEL(cc, 79));
   1586          g = UP5(CC_SEL(cc, 84));
   1587          r = UP5(CC_SEL(cc, 89));
   1588          a = UP5(CC_SEL(cc, 114));
   1589       } else {
   1590          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
   1591          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
   1592          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
   1593          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
   1594       }
   1595    } else {
   1596       /* lerp == 0 */
   1597 
   1598       if (t & 16) {
   1599          cc++;
   1600          t &= 15;
   1601       }
   1602       t = (cc[0] >> (t * 2)) & 3;
   1603 
   1604       if (t == 3) {
   1605          /* zero */
   1606          r = g = b = a = 0;
   1607       } else {
   1608          GLuint kk;
   1609          cc = (const GLuint *)code;
   1610          a = UP5(cc[3] >> (t * 5 + 13));
   1611          t *= 15;
   1612          cc = (const GLuint *)(code + 8 + t / 8);
   1613          kk = cc[0] >> (t & 7);
   1614          b = UP5(kk);
   1615          g = UP5(kk >> 5);
   1616          r = UP5(kk >> 10);
   1617       }
   1618    }
   1619    rgba[RCOMP] = r;
   1620    rgba[GCOMP] = g;
   1621    rgba[BCOMP] = b;
   1622    rgba[ACOMP] = a;
   1623 }
   1624 
   1625 
   1626 void
   1627 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
   1628                GLint i, GLint j, GLubyte *rgba)
   1629 {
   1630    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
   1631       fxt1_decode_1HI,     /* cc-high   = "00?" */
   1632       fxt1_decode_1HI,     /* cc-high   = "00?" */
   1633       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
   1634       fxt1_decode_1ALPHA,  /* alpha     = "011" */
   1635       fxt1_decode_1MIXED,  /* mixed     = "1??" */
   1636       fxt1_decode_1MIXED,  /* mixed     = "1??" */
   1637       fxt1_decode_1MIXED,  /* mixed     = "1??" */
   1638       fxt1_decode_1MIXED   /* mixed     = "1??" */
   1639    };
   1640 
   1641    const GLubyte *code = (const GLubyte *)texture +
   1642                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
   1643    GLint mode = CC_SEL(code, 125);
   1644    GLint t = i & 7;
   1645 
   1646    if (t & 4) {
   1647       t += 12;
   1648    }
   1649    t += (j & 3) * 4;
   1650 
   1651    decode_1[mode](code, t, rgba);
   1652 }
   1653 
   1654 
   1655 #endif /* FEATURE_texture_fxt1 */
   1656