Home | History | Annotate | Download | only in main
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  */
     24 
     25 
     26 /**
     27  * \file texcompress_fxt1.c
     28  * GL_3DFX_texture_compression_FXT1 support.
     29  */
     30 
     31 
     32 #include "glheader.h"
     33 #include "imports.h"
     34 #include "image.h"
     35 #include "macros.h"
     36 #include "mipmap.h"
     37 #include "texcompress.h"
     38 #include "texcompress_fxt1.h"
     39 #include "texstore.h"
     40 
     41 
     42 static void
     43 fxt1_encode (GLuint width, GLuint height, GLint comps,
     44              const void *source, GLint srcRowStride,
     45              void *dest, GLint destRowStride);
     46 
     47 static void
     48 fxt1_decode_1 (const void *texture, GLint stride,
     49                GLint i, GLint j, GLubyte *rgba);
     50 
     51 
     52 /**
     53  * Store user's image in rgb_fxt1 format.
     54  */
     55 GLboolean
     56 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
     57 {
     58    const GLubyte *pixels;
     59    GLint srcRowStride;
     60    GLubyte *dst;
     61    const GLubyte *tempImage = NULL;
     62 
     63    assert(dstFormat == MESA_FORMAT_RGB_FXT1);
     64 
     65    if (srcFormat != GL_RGB ||
     66        srcType != GL_UNSIGNED_BYTE ||
     67        ctx->_ImageTransferState ||
     68        ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
     69        srcPacking->SwapBytes) {
     70       /* convert image to RGB/GLubyte */
     71       GLubyte *tempImageSlices[1];
     72       int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
     73       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
     74       if (!tempImage)
     75          return GL_FALSE; /* out of memory */
     76       tempImageSlices[0] = (GLubyte *) tempImage;
     77       _mesa_texstore(ctx, dims,
     78                      baseInternalFormat,
     79                      MESA_FORMAT_RGB_UNORM8,
     80                      rgbRowStride, tempImageSlices,
     81                      srcWidth, srcHeight, srcDepth,
     82                      srcFormat, srcType, srcAddr,
     83                      srcPacking);
     84       pixels = tempImage;
     85       srcRowStride = 3 * srcWidth;
     86       srcFormat = GL_RGB;
     87    }
     88    else {
     89       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
     90                                      srcFormat, srcType, 0, 0);
     91 
     92       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
     93                                             srcType) / sizeof(GLubyte);
     94    }
     95 
     96    dst = dstSlices[0];
     97 
     98    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
     99                dst, dstRowStride);
    100 
    101    free((void*) tempImage);
    102 
    103    return GL_TRUE;
    104 }
    105 
    106 
    107 /**
    108  * Store user's image in rgba_fxt1 format.
    109  */
    110 GLboolean
    111 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
    112 {
    113    const GLubyte *pixels;
    114    GLint srcRowStride;
    115    GLubyte *dst;
    116    const GLubyte *tempImage = NULL;
    117 
    118    assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
    119 
    120    if (srcFormat != GL_RGBA ||
    121        srcType != GL_UNSIGNED_BYTE ||
    122        ctx->_ImageTransferState ||
    123        srcPacking->SwapBytes) {
    124       /* convert image to RGBA/GLubyte */
    125       GLubyte *tempImageSlices[1];
    126       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
    127       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
    128       if (!tempImage)
    129          return GL_FALSE; /* out of memory */
    130       tempImageSlices[0] = (GLubyte *) tempImage;
    131       _mesa_texstore(ctx, dims,
    132                      baseInternalFormat,
    133                      _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
    134                                            : MESA_FORMAT_A8B8G8R8_UNORM,
    135                      rgbaRowStride, tempImageSlices,
    136                      srcWidth, srcHeight, srcDepth,
    137                      srcFormat, srcType, srcAddr,
    138                      srcPacking);
    139       pixels = tempImage;
    140       srcRowStride = 4 * srcWidth;
    141       srcFormat = GL_RGBA;
    142    }
    143    else {
    144       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
    145                                      srcFormat, srcType, 0, 0);
    146 
    147       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
    148                                             srcType) / sizeof(GLubyte);
    149    }
    150 
    151    dst = dstSlices[0];
    152 
    153    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
    154                dst, dstRowStride);
    155 
    156    free((void*) tempImage);
    157 
    158    return GL_TRUE;
    159 }
    160 
    161 
    162 /***************************************************************************\
    163  * FXT1 encoder
    164  *
    165  * The encoder was built by reversing the decoder,
    166  * and is vaguely based on Texus2 by 3dfx. Note that this code
    167  * is merely a proof of concept, since it is highly UNoptimized;
    168  * moreover, it is sub-optimal due to initial conditions passed
    169  * to Lloyd's algorithm (the interpolation modes are even worse).
    170 \***************************************************************************/
    171 
    172 
    173 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
    174 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
    175 #define N_TEXELS 32 /* number of texels in a block (always 32) */
    176 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
    177 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
    178 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
    179 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
    180 static const GLuint zero = 0;
    181 #define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
    182 
    183 /*
    184  * Define a 64-bit unsigned integer type and macros
    185  */
    186 #if 1
    187 
    188 #define FX64_NATIVE 1
    189 
    190 typedef uint64_t Fx64;
    191 
    192 #define FX64_MOV32(a, b) a = b
    193 #define FX64_OR32(a, b)  a |= b
    194 #define FX64_SHL(a, c)   a <<= c
    195 
    196 #else
    197 
    198 #define FX64_NATIVE 0
    199 
    200 typedef struct {
    201    GLuint lo, hi;
    202 } Fx64;
    203 
    204 #define FX64_MOV32(a, b) a.lo = b
    205 #define FX64_OR32(a, b)  a.lo |= b
    206 
    207 #define FX64_SHL(a, c)                                 \
    208    do {                                                \
    209        if ((c) >= 32) {                                \
    210           a.hi = a.lo << ((c) - 32);                   \
    211           a.lo = 0;                                    \
    212        } else {                                        \
    213           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
    214           a.lo <<= (c);                                \
    215        }                                               \
    216    } while (0)
    217 
    218 #endif
    219 
    220 
    221 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
    222 #define SAFECDOT 1 /* for paranoids */
    223 
    224 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
    225    do {                                  \
    226       /* compute interpolation vector */ \
    227       GLfloat d2 = 0.0F;                 \
    228       GLfloat rd2;                       \
    229                                          \
    230       for (i = 0; i < NC; i++) {         \
    231          IV[i] = (V1[i] - V0[i]) * F(i); \
    232          d2 += IV[i] * IV[i];            \
    233       }                                  \
    234       rd2 = (GLfloat)NV / d2;            \
    235       B = 0;                             \
    236       for (i = 0; i < NC; i++) {         \
    237          IV[i] *= F(i);                  \
    238          B -= IV[i] * V0[i];             \
    239          IV[i] *= rd2;                   \
    240       }                                  \
    241       B = B * rd2 + 0.5f;                \
    242    } while (0)
    243 
    244 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
    245    do {                                  \
    246       GLfloat dot = 0.0F;                \
    247       for (i = 0; i < NC; i++) {         \
    248          dot += V[i] * IV[i];            \
    249       }                                  \
    250       TEXEL = (GLint)(dot + B);          \
    251       if (SAFECDOT) {                    \
    252          if (TEXEL < 0) {                \
    253             TEXEL = 0;                   \
    254          } else if (TEXEL > NV) {        \
    255             TEXEL = NV;                  \
    256          }                               \
    257       }                                  \
    258    } while (0)
    259 
    260 
    261 static GLint
    262 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
    263               GLubyte input[MAX_COMP], GLint nc)
    264 {
    265    GLint i, j, best = -1;
    266    GLfloat err = 1e9; /* big enough */
    267 
    268    for (j = 0; j < nv; j++) {
    269       GLfloat e = 0.0F;
    270       for (i = 0; i < nc; i++) {
    271          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
    272       }
    273       if (e < err) {
    274          err = e;
    275          best = j;
    276       }
    277    }
    278 
    279    return best;
    280 }
    281 
    282 
    283 static GLint
    284 fxt1_worst (GLfloat vec[MAX_COMP],
    285             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
    286 {
    287    GLint i, k, worst = -1;
    288    GLfloat err = -1.0F; /* small enough */
    289 
    290    for (k = 0; k < n; k++) {
    291       GLfloat e = 0.0F;
    292       for (i = 0; i < nc; i++) {
    293          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
    294       }
    295       if (e > err) {
    296          err = e;
    297          worst = k;
    298       }
    299    }
    300 
    301    return worst;
    302 }
    303 
    304 
    305 static GLint
    306 fxt1_variance (GLdouble variance[MAX_COMP],
    307                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
    308 {
    309    GLint i, k, best = 0;
    310    GLint sx, sx2;
    311    GLdouble var, maxvar = -1; /* small enough */
    312    GLdouble teenth = 1.0 / n;
    313 
    314    for (i = 0; i < nc; i++) {
    315       sx = sx2 = 0;
    316       for (k = 0; k < n; k++) {
    317          GLint t = input[k][i];
    318          sx += t;
    319          sx2 += t * t;
    320       }
    321       var = sx2 * teenth - sx * sx * teenth * teenth;
    322       if (maxvar < var) {
    323          maxvar = var;
    324          best = i;
    325       }
    326       if (variance) {
    327          variance[i] = var;
    328       }
    329    }
    330 
    331    return best;
    332 }
    333 
    334 
    335 static GLint
    336 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
    337              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
    338 {
    339 #if 0
    340    /* Choose colors from a grid.
    341     */
    342    GLint i, j;
    343 
    344    for (j = 0; j < nv; j++) {
    345       GLint m = j * (n - 1) / (nv - 1);
    346       for (i = 0; i < nc; i++) {
    347          vec[j][i] = input[m][i];
    348       }
    349    }
    350 #else
    351    /* Our solution here is to find the darkest and brightest colors in
    352     * the 8x4 tile and use those as the two representative colors.
    353     * There are probably better algorithms to use (histogram-based).
    354     */
    355    GLint i, j, k;
    356    GLint minSum = 2000; /* big enough */
    357    GLint maxSum = -1; /* small enough */
    358    GLint minCol = 0; /* phoudoin: silent compiler! */
    359    GLint maxCol = 0; /* phoudoin: silent compiler! */
    360 
    361    struct {
    362       GLint flag;
    363       GLint key;
    364       GLint freq;
    365       GLint idx;
    366    } hist[N_TEXELS];
    367    GLint lenh = 0;
    368 
    369    memset(hist, 0, sizeof(hist));
    370 
    371    for (k = 0; k < n; k++) {
    372       GLint l;
    373       GLint key = 0;
    374       GLint sum = 0;
    375       for (i = 0; i < nc; i++) {
    376          key <<= 8;
    377          key |= input[k][i];
    378          sum += input[k][i];
    379       }
    380       for (l = 0; l < n; l++) {
    381          if (!hist[l].flag) {
    382             /* alloc new slot */
    383             hist[l].flag = !0;
    384             hist[l].key = key;
    385             hist[l].freq = 1;
    386             hist[l].idx = k;
    387             lenh = l + 1;
    388             break;
    389          } else if (hist[l].key == key) {
    390             hist[l].freq++;
    391             break;
    392          }
    393       }
    394       if (minSum > sum) {
    395          minSum = sum;
    396          minCol = k;
    397       }
    398       if (maxSum < sum) {
    399          maxSum = sum;
    400          maxCol = k;
    401       }
    402    }
    403 
    404    if (lenh <= nv) {
    405       for (j = 0; j < lenh; j++) {
    406          for (i = 0; i < nc; i++) {
    407             vec[j][i] = (GLfloat)input[hist[j].idx][i];
    408          }
    409       }
    410       for (; j < nv; j++) {
    411          for (i = 0; i < nc; i++) {
    412             vec[j][i] = vec[0][i];
    413          }
    414       }
    415       return 0;
    416    }
    417 
    418    for (j = 0; j < nv; j++) {
    419       for (i = 0; i < nc; i++) {
    420          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
    421       }
    422    }
    423 #endif
    424 
    425    return !0;
    426 }
    427 
    428 
    429 static GLint
    430 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
    431             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
    432 {
    433    /* Use the generalized lloyd's algorithm for VQ:
    434     *     find 4 color vectors.
    435     *
    436     *     for each sample color
    437     *         sort to nearest vector.
    438     *
    439     *     replace each vector with the centroid of its matching colors.
    440     *
    441     *     repeat until RMS doesn't improve.
    442     *
    443     *     if a color vector has no samples, or becomes the same as another
    444     *     vector, replace it with the color which is farthest from a sample.
    445     *
    446     * vec[][MAX_COMP]           initial vectors and resulting colors
    447     * nv                        number of resulting colors required
    448     * input[N_TEXELS][MAX_COMP] input texels
    449     * nc                        number of components in input / vec
    450     * n                         number of input samples
    451     */
    452 
    453    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
    454    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
    455    GLfloat error, lasterror = 1e9;
    456 
    457    GLint i, j, k, rep;
    458 
    459    /* the quantizer */
    460    for (rep = 0; rep < LL_N_REP; rep++) {
    461       /* reset sums & counters */
    462       for (j = 0; j < nv; j++) {
    463          for (i = 0; i < nc; i++) {
    464             sum[j][i] = 0;
    465          }
    466          cnt[j] = 0;
    467       }
    468       error = 0;
    469 
    470       /* scan whole block */
    471       for (k = 0; k < n; k++) {
    472 #if 1
    473          GLint best = -1;
    474          GLfloat err = 1e9; /* big enough */
    475          /* determine best vector */
    476          for (j = 0; j < nv; j++) {
    477             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
    478                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
    479                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
    480             if (nc == 4) {
    481                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
    482             }
    483             if (e < err) {
    484                err = e;
    485                best = j;
    486             }
    487          }
    488 #else
    489          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
    490 #endif
    491          assert(best >= 0);
    492          /* add in closest color */
    493          for (i = 0; i < nc; i++) {
    494             sum[best][i] += input[k][i];
    495          }
    496          /* mark this vector as used */
    497          cnt[best]++;
    498          /* accumulate error */
    499          error += err;
    500       }
    501 
    502       /* check RMS */
    503       if ((error < LL_RMS_E) ||
    504           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
    505          return !0; /* good match */
    506       }
    507       lasterror = error;
    508 
    509       /* move each vector to the barycenter of its closest colors */
    510       for (j = 0; j < nv; j++) {
    511          if (cnt[j]) {
    512             GLfloat div = 1.0F / cnt[j];
    513             for (i = 0; i < nc; i++) {
    514                vec[j][i] = div * sum[j][i];
    515             }
    516          } else {
    517             /* this vec has no samples or is identical with a previous vec */
    518             GLint worst = fxt1_worst(vec[j], input, nc, n);
    519             for (i = 0; i < nc; i++) {
    520                vec[j][i] = input[worst][i];
    521             }
    522          }
    523       }
    524    }
    525 
    526    return 0; /* could not converge fast enough */
    527 }
    528 
    529 
    530 static void
    531 fxt1_quantize_CHROMA (GLuint *cc,
    532                       GLubyte input[N_TEXELS][MAX_COMP])
    533 {
    534    const GLint n_vect = 4; /* 4 base vectors to find */
    535    const GLint n_comp = 3; /* 3 components: R, G, B */
    536    GLfloat vec[MAX_VECT][MAX_COMP];
    537    GLint i, j, k;
    538    Fx64 hi; /* high quadword */
    539    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
    540 
    541    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
    542       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
    543    }
    544 
    545    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
    546    for (j = n_vect - 1; j >= 0; j--) {
    547       for (i = 0; i < n_comp; i++) {
    548          /* add in colors */
    549          FX64_SHL(hi, 5);
    550          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
    551       }
    552    }
    553    ((Fx64 *)cc)[1] = hi;
    554 
    555    lohi = lolo = 0;
    556    /* right microtile */
    557    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
    558       lohi <<= 2;
    559       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
    560    }
    561    /* left microtile */
    562    for (; k >= 0; k--) {
    563       lolo <<= 2;
    564       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
    565    }
    566    cc[1] = lohi;
    567    cc[0] = lolo;
    568 }
    569 
    570 
    571 static void
    572 fxt1_quantize_ALPHA0 (GLuint *cc,
    573                       GLubyte input[N_TEXELS][MAX_COMP],
    574                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
    575 {
    576    const GLint n_vect = 3; /* 3 base vectors to find */
    577    const GLint n_comp = 4; /* 4 components: R, G, B, A */
    578    GLfloat vec[MAX_VECT][MAX_COMP];
    579    GLint i, j, k;
    580    Fx64 hi; /* high quadword */
    581    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
    582 
    583    /* the last vector indicates zero */
    584    for (i = 0; i < n_comp; i++) {
    585       vec[n_vect][i] = 0;
    586    }
    587 
    588    /* the first n texels in reord are guaranteed to be non-zero */
    589    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
    590       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
    591    }
    592 
    593    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
    594    for (j = n_vect - 1; j >= 0; j--) {
    595       /* add in alphas */
    596       FX64_SHL(hi, 5);
    597       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
    598    }
    599    for (j = n_vect - 1; j >= 0; j--) {
    600       for (i = 0; i < n_comp - 1; i++) {
    601          /* add in colors */
    602          FX64_SHL(hi, 5);
    603          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
    604       }
    605    }
    606    ((Fx64 *)cc)[1] = hi;
    607 
    608    lohi = lolo = 0;
    609    /* right microtile */
    610    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
    611       lohi <<= 2;
    612       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
    613    }
    614    /* left microtile */
    615    for (; k >= 0; k--) {
    616       lolo <<= 2;
    617       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
    618    }
    619    cc[1] = lohi;
    620    cc[0] = lolo;
    621 }
    622 
    623 
    624 static void
    625 fxt1_quantize_ALPHA1 (GLuint *cc,
    626                       GLubyte input[N_TEXELS][MAX_COMP])
    627 {
    628    const GLint n_vect = 3; /* highest vector number in each microtile */
    629    const GLint n_comp = 4; /* 4 components: R, G, B, A */
    630    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
    631    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
    632    GLint i, j, k;
    633    Fx64 hi; /* high quadword */
    634    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
    635 
    636    GLint minSum;
    637    GLint maxSum;
    638    GLint minColL = 0, maxColL = 0;
    639    GLint minColR = 0, maxColR = 0;
    640    GLint sumL = 0, sumR = 0;
    641    GLint nn_comp;
    642    /* Our solution here is to find the darkest and brightest colors in
    643     * the 4x4 tile and use those as the two representative colors.
    644     * There are probably better algorithms to use (histogram-based).
    645     */
    646    nn_comp = n_comp;
    647    while ((minColL == maxColL) && nn_comp) {
    648        minSum = 2000; /* big enough */
    649        maxSum = -1; /* small enough */
    650        for (k = 0; k < N_TEXELS / 2; k++) {
    651            GLint sum = 0;
    652            for (i = 0; i < nn_comp; i++) {
    653                sum += input[k][i];
    654            }
    655            if (minSum > sum) {
    656                minSum = sum;
    657                minColL = k;
    658            }
    659            if (maxSum < sum) {
    660                maxSum = sum;
    661                maxColL = k;
    662            }
    663            sumL += sum;
    664        }
    665 
    666        nn_comp--;
    667    }
    668 
    669    nn_comp = n_comp;
    670    while ((minColR == maxColR) && nn_comp) {
    671        minSum = 2000; /* big enough */
    672        maxSum = -1; /* small enough */
    673        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
    674            GLint sum = 0;
    675            for (i = 0; i < nn_comp; i++) {
    676                sum += input[k][i];
    677            }
    678            if (minSum > sum) {
    679                minSum = sum;
    680                minColR = k;
    681            }
    682            if (maxSum < sum) {
    683                maxSum = sum;
    684                maxColR = k;
    685            }
    686            sumR += sum;
    687        }
    688 
    689        nn_comp--;
    690    }
    691 
    692    /* choose the common vector (yuck!) */
    693    {
    694       GLint j1, j2;
    695       GLint v1 = 0, v2 = 0;
    696       GLfloat err = 1e9; /* big enough */
    697       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
    698       for (i = 0; i < n_comp; i++) {
    699          tv[0][i] = input[minColL][i];
    700          tv[1][i] = input[maxColL][i];
    701          tv[2][i] = input[minColR][i];
    702          tv[3][i] = input[maxColR][i];
    703       }
    704       for (j1 = 0; j1 < 2; j1++) {
    705          for (j2 = 2; j2 < 4; j2++) {
    706             GLfloat e = 0.0F;
    707             for (i = 0; i < n_comp; i++) {
    708                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
    709             }
    710             if (e < err) {
    711                err = e;
    712                v1 = j1;
    713                v2 = j2;
    714             }
    715          }
    716       }
    717       for (i = 0; i < n_comp; i++) {
    718          vec[0][i] = tv[1 - v1][i];
    719          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
    720          vec[2][i] = tv[5 - v2][i];
    721       }
    722    }
    723 
    724    /* left microtile */
    725    cc[0] = 0;
    726    if (minColL != maxColL) {
    727       /* compute interpolation vector */
    728       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
    729 
    730       /* add in texels */
    731       lolo = 0;
    732       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
    733          GLint texel;
    734          /* interpolate color */
    735          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
    736          /* add in texel */
    737          lolo <<= 2;
    738          lolo |= texel;
    739       }
    740 
    741       cc[0] = lolo;
    742    }
    743 
    744    /* right microtile */
    745    cc[1] = 0;
    746    if (minColR != maxColR) {
    747       /* compute interpolation vector */
    748       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
    749 
    750       /* add in texels */
    751       lohi = 0;
    752       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
    753          GLint texel;
    754          /* interpolate color */
    755          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
    756          /* add in texel */
    757          lohi <<= 2;
    758          lohi |= texel;
    759       }
    760 
    761       cc[1] = lohi;
    762    }
    763 
    764    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
    765    for (j = n_vect - 1; j >= 0; j--) {
    766       /* add in alphas */
    767       FX64_SHL(hi, 5);
    768       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
    769    }
    770    for (j = n_vect - 1; j >= 0; j--) {
    771       for (i = 0; i < n_comp - 1; i++) {
    772          /* add in colors */
    773          FX64_SHL(hi, 5);
    774          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
    775       }
    776    }
    777    ((Fx64 *)cc)[1] = hi;
    778 }
    779 
    780 
    781 static void
    782 fxt1_quantize_HI (GLuint *cc,
    783                   GLubyte input[N_TEXELS][MAX_COMP],
    784                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
    785 {
    786    const GLint n_vect = 6; /* highest vector number */
    787    const GLint n_comp = 3; /* 3 components: R, G, B */
    788    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
    789    GLfloat iv[MAX_COMP];   /* interpolation vector */
    790    GLint i, k;
    791    GLuint hihi; /* high quadword: hi dword */
    792 
    793    GLint minSum = 2000; /* big enough */
    794    GLint maxSum = -1; /* small enough */
    795    GLint minCol = 0; /* phoudoin: silent compiler! */
    796    GLint maxCol = 0; /* phoudoin: silent compiler! */
    797 
    798    /* Our solution here is to find the darkest and brightest colors in
    799     * the 8x4 tile and use those as the two representative colors.
    800     * There are probably better algorithms to use (histogram-based).
    801     */
    802    for (k = 0; k < n; k++) {
    803       GLint sum = 0;
    804       for (i = 0; i < n_comp; i++) {
    805          sum += reord[k][i];
    806       }
    807       if (minSum > sum) {
    808          minSum = sum;
    809          minCol = k;
    810       }
    811       if (maxSum < sum) {
    812          maxSum = sum;
    813          maxCol = k;
    814       }
    815    }
    816 
    817    hihi = 0; /* cc-hi = "00" */
    818    for (i = 0; i < n_comp; i++) {
    819       /* add in colors */
    820       hihi <<= 5;
    821       hihi |= reord[maxCol][i] >> 3;
    822    }
    823    for (i = 0; i < n_comp; i++) {
    824       /* add in colors */
    825       hihi <<= 5;
    826       hihi |= reord[minCol][i] >> 3;
    827    }
    828    cc[3] = hihi;
    829    cc[0] = cc[1] = cc[2] = 0;
    830 
    831    /* compute interpolation vector */
    832    if (minCol != maxCol) {
    833       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
    834    }
    835 
    836    /* add in texels */
    837    for (k = N_TEXELS - 1; k >= 0; k--) {
    838       GLint t = k * 3;
    839       GLuint *kk = (GLuint *)((char *)cc + t / 8);
    840       GLint texel = n_vect + 1; /* transparent black */
    841 
    842       if (!ISTBLACK(input[k])) {
    843          if (minCol != maxCol) {
    844             /* interpolate color */
    845             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
    846             /* add in texel */
    847             kk[0] |= texel << (t & 7);
    848          }
    849       } else {
    850          /* add in texel */
    851          kk[0] |= texel << (t & 7);
    852       }
    853    }
    854 }
    855 
    856 
    857 static void
    858 fxt1_quantize_MIXED1 (GLuint *cc,
    859                       GLubyte input[N_TEXELS][MAX_COMP])
    860 {
    861    const GLint n_vect = 2; /* highest vector number in each microtile */
    862    const GLint n_comp = 3; /* 3 components: R, G, B */
    863    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
    864    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
    865    GLint i, j, k;
    866    Fx64 hi; /* high quadword */
    867    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
    868 
    869    GLint minSum;
    870    GLint maxSum;
    871    GLint minColL = 0, maxColL = -1;
    872    GLint minColR = 0, maxColR = -1;
    873 
    874    /* Our solution here is to find the darkest and brightest colors in
    875     * the 4x4 tile and use those as the two representative colors.
    876     * There are probably better algorithms to use (histogram-based).
    877     */
    878    minSum = 2000; /* big enough */
    879    maxSum = -1; /* small enough */
    880    for (k = 0; k < N_TEXELS / 2; k++) {
    881       if (!ISTBLACK(input[k])) {
    882          GLint sum = 0;
    883          for (i = 0; i < n_comp; i++) {
    884             sum += input[k][i];
    885          }
    886          if (minSum > sum) {
    887             minSum = sum;
    888             minColL = k;
    889          }
    890          if (maxSum < sum) {
    891             maxSum = sum;
    892             maxColL = k;
    893          }
    894       }
    895    }
    896    minSum = 2000; /* big enough */
    897    maxSum = -1; /* small enough */
    898    for (; k < N_TEXELS; k++) {
    899       if (!ISTBLACK(input[k])) {
    900          GLint sum = 0;
    901          for (i = 0; i < n_comp; i++) {
    902             sum += input[k][i];
    903          }
    904          if (minSum > sum) {
    905             minSum = sum;
    906             minColR = k;
    907          }
    908          if (maxSum < sum) {
    909             maxSum = sum;
    910             maxColR = k;
    911          }
    912       }
    913    }
    914 
    915    /* left microtile */
    916    if (maxColL == -1) {
    917       /* all transparent black */
    918       cc[0] = ~0u;
    919       for (i = 0; i < n_comp; i++) {
    920          vec[0][i] = 0;
    921          vec[1][i] = 0;
    922       }
    923    } else {
    924       cc[0] = 0;
    925       for (i = 0; i < n_comp; i++) {
    926          vec[0][i] = input[minColL][i];
    927          vec[1][i] = input[maxColL][i];
    928       }
    929       if (minColL != maxColL) {
    930          /* compute interpolation vector */
    931          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
    932 
    933          /* add in texels */
    934          lolo = 0;
    935          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
    936             GLint texel = n_vect + 1; /* transparent black */
    937             if (!ISTBLACK(input[k])) {
    938                /* interpolate color */
    939                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
    940             }
    941             /* add in texel */
    942             lolo <<= 2;
    943             lolo |= texel;
    944          }
    945          cc[0] = lolo;
    946       }
    947    }
    948 
    949    /* right microtile */
    950    if (maxColR == -1) {
    951       /* all transparent black */
    952       cc[1] = ~0u;
    953       for (i = 0; i < n_comp; i++) {
    954          vec[2][i] = 0;
    955          vec[3][i] = 0;
    956       }
    957    } else {
    958       cc[1] = 0;
    959       for (i = 0; i < n_comp; i++) {
    960          vec[2][i] = input[minColR][i];
    961          vec[3][i] = input[maxColR][i];
    962       }
    963       if (minColR != maxColR) {
    964          /* compute interpolation vector */
    965          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
    966 
    967          /* add in texels */
    968          lohi = 0;
    969          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
    970             GLint texel = n_vect + 1; /* transparent black */
    971             if (!ISTBLACK(input[k])) {
    972                /* interpolate color */
    973                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
    974             }
    975             /* add in texel */
    976             lohi <<= 2;
    977             lohi |= texel;
    978          }
    979          cc[1] = lohi;
    980       }
    981    }
    982 
    983    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
    984    for (j = 2 * 2 - 1; j >= 0; j--) {
    985       for (i = 0; i < n_comp; i++) {
    986          /* add in colors */
    987          FX64_SHL(hi, 5);
    988          FX64_OR32(hi, vec[j][i] >> 3);
    989       }
    990    }
    991    ((Fx64 *)cc)[1] = hi;
    992 }
    993 
    994 
    995 static void
    996 fxt1_quantize_MIXED0 (GLuint *cc,
    997                       GLubyte input[N_TEXELS][MAX_COMP])
    998 {
    999    const GLint n_vect = 3; /* highest vector number in each microtile */
   1000    const GLint n_comp = 3; /* 3 components: R, G, B */
   1001    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
   1002    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
   1003    GLint i, j, k;
   1004    Fx64 hi; /* high quadword */
   1005    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
   1006 
   1007    GLint minColL = 0, maxColL = 0;
   1008    GLint minColR = 0, maxColR = 0;
   1009 #if 0
   1010    GLint minSum;
   1011    GLint maxSum;
   1012 
   1013    /* Our solution here is to find the darkest and brightest colors in
   1014     * the 4x4 tile and use those as the two representative colors.
   1015     * There are probably better algorithms to use (histogram-based).
   1016     */
   1017    minSum = 2000; /* big enough */
   1018    maxSum = -1; /* small enough */
   1019    for (k = 0; k < N_TEXELS / 2; k++) {
   1020       GLint sum = 0;
   1021       for (i = 0; i < n_comp; i++) {
   1022          sum += input[k][i];
   1023       }
   1024       if (minSum > sum) {
   1025          minSum = sum;
   1026          minColL = k;
   1027       }
   1028       if (maxSum < sum) {
   1029          maxSum = sum;
   1030          maxColL = k;
   1031       }
   1032    }
   1033    minSum = 2000; /* big enough */
   1034    maxSum = -1; /* small enough */
   1035    for (; k < N_TEXELS; k++) {
   1036       GLint sum = 0;
   1037       for (i = 0; i < n_comp; i++) {
   1038          sum += input[k][i];
   1039       }
   1040       if (minSum > sum) {
   1041          minSum = sum;
   1042          minColR = k;
   1043       }
   1044       if (maxSum < sum) {
   1045          maxSum = sum;
   1046          maxColR = k;
   1047       }
   1048    }
   1049 #else
   1050    GLint minVal;
   1051    GLint maxVal;
   1052    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
   1053    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
   1054 
   1055    /* Scan the channel with max variance for lo & hi
   1056     * and use those as the two representative colors.
   1057     */
   1058    minVal = 2000; /* big enough */
   1059    maxVal = -1; /* small enough */
   1060    for (k = 0; k < N_TEXELS / 2; k++) {
   1061       GLint t = input[k][maxVarL];
   1062       if (minVal > t) {
   1063          minVal = t;
   1064          minColL = k;
   1065       }
   1066       if (maxVal < t) {
   1067          maxVal = t;
   1068          maxColL = k;
   1069       }
   1070    }
   1071    minVal = 2000; /* big enough */
   1072    maxVal = -1; /* small enough */
   1073    for (; k < N_TEXELS; k++) {
   1074       GLint t = input[k][maxVarR];
   1075       if (minVal > t) {
   1076          minVal = t;
   1077          minColR = k;
   1078       }
   1079       if (maxVal < t) {
   1080          maxVal = t;
   1081          maxColR = k;
   1082       }
   1083    }
   1084 #endif
   1085 
   1086    /* left microtile */
   1087    cc[0] = 0;
   1088    for (i = 0; i < n_comp; i++) {
   1089       vec[0][i] = input[minColL][i];
   1090       vec[1][i] = input[maxColL][i];
   1091    }
   1092    if (minColL != maxColL) {
   1093       /* compute interpolation vector */
   1094       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
   1095 
   1096       /* add in texels */
   1097       lolo = 0;
   1098       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
   1099          GLint texel;
   1100          /* interpolate color */
   1101          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
   1102          /* add in texel */
   1103          lolo <<= 2;
   1104          lolo |= texel;
   1105       }
   1106 
   1107       /* funky encoding for LSB of green */
   1108       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
   1109          for (i = 0; i < n_comp; i++) {
   1110             vec[1][i] = input[minColL][i];
   1111             vec[0][i] = input[maxColL][i];
   1112          }
   1113          lolo = ~lolo;
   1114       }
   1115 
   1116       cc[0] = lolo;
   1117    }
   1118 
   1119    /* right microtile */
   1120    cc[1] = 0;
   1121    for (i = 0; i < n_comp; i++) {
   1122       vec[2][i] = input[minColR][i];
   1123       vec[3][i] = input[maxColR][i];
   1124    }
   1125    if (minColR != maxColR) {
   1126       /* compute interpolation vector */
   1127       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
   1128 
   1129       /* add in texels */
   1130       lohi = 0;
   1131       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
   1132          GLint texel;
   1133          /* interpolate color */
   1134          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
   1135          /* add in texel */
   1136          lohi <<= 2;
   1137          lohi |= texel;
   1138       }
   1139 
   1140       /* funky encoding for LSB of green */
   1141       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
   1142          for (i = 0; i < n_comp; i++) {
   1143             vec[3][i] = input[minColR][i];
   1144             vec[2][i] = input[maxColR][i];
   1145          }
   1146          lohi = ~lohi;
   1147       }
   1148 
   1149       cc[1] = lohi;
   1150    }
   1151 
   1152    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
   1153    for (j = 2 * 2 - 1; j >= 0; j--) {
   1154       for (i = 0; i < n_comp; i++) {
   1155          /* add in colors */
   1156          FX64_SHL(hi, 5);
   1157          FX64_OR32(hi, vec[j][i] >> 3);
   1158       }
   1159    }
   1160    ((Fx64 *)cc)[1] = hi;
   1161 }
   1162 
   1163 
   1164 static void
   1165 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
   1166 {
   1167    GLint trualpha;
   1168    GLubyte reord[N_TEXELS][MAX_COMP];
   1169 
   1170    GLubyte input[N_TEXELS][MAX_COMP];
   1171    GLint i, k, l;
   1172 
   1173    if (comps == 3) {
   1174       /* make the whole block opaque */
   1175       memset(input, -1, sizeof(input));
   1176    }
   1177 
   1178    /* 8 texels each line */
   1179    for (l = 0; l < 4; l++) {
   1180       for (k = 0; k < 4; k++) {
   1181          for (i = 0; i < comps; i++) {
   1182             input[k + l * 4][i] = *lines[l]++;
   1183          }
   1184       }
   1185       for (; k < 8; k++) {
   1186          for (i = 0; i < comps; i++) {
   1187             input[k + l * 4 + 12][i] = *lines[l]++;
   1188          }
   1189       }
   1190    }
   1191 
   1192    /* block layout:
   1193     * 00, 01, 02, 03, 08, 09, 0a, 0b
   1194     * 10, 11, 12, 13, 18, 19, 1a, 1b
   1195     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
   1196     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
   1197     */
   1198 
   1199    /* [dBorca]
   1200     * stupidity flows forth from this
   1201     */
   1202    l = N_TEXELS;
   1203    trualpha = 0;
   1204    if (comps == 4) {
   1205       /* skip all transparent black texels */
   1206       l = 0;
   1207       for (k = 0; k < N_TEXELS; k++) {
   1208          /* test all components against 0 */
   1209          if (!ISTBLACK(input[k])) {
   1210             /* texel is not transparent black */
   1211             COPY_4UBV(reord[l], input[k]);
   1212             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
   1213                /* non-opaque texel */
   1214                trualpha = !0;
   1215             }
   1216             l++;
   1217          }
   1218       }
   1219    }
   1220 
   1221 #if 0
   1222    if (trualpha) {
   1223       fxt1_quantize_ALPHA0(cc, input, reord, l);
   1224    } else if (l == 0) {
   1225       cc[0] = cc[1] = cc[2] = -1;
   1226       cc[3] = 0;
   1227    } else if (l < N_TEXELS) {
   1228       fxt1_quantize_HI(cc, input, reord, l);
   1229    } else {
   1230       fxt1_quantize_CHROMA(cc, input);
   1231    }
   1232    (void)fxt1_quantize_ALPHA1;
   1233    (void)fxt1_quantize_MIXED1;
   1234    (void)fxt1_quantize_MIXED0;
   1235 #else
   1236    if (trualpha) {
   1237       fxt1_quantize_ALPHA1(cc, input);
   1238    } else if (l == 0) {
   1239       cc[0] = cc[1] = cc[2] = ~0u;
   1240       cc[3] = 0;
   1241    } else if (l < N_TEXELS) {
   1242       fxt1_quantize_MIXED1(cc, input);
   1243    } else {
   1244       fxt1_quantize_MIXED0(cc, input);
   1245    }
   1246    (void)fxt1_quantize_ALPHA0;
   1247    (void)fxt1_quantize_HI;
   1248    (void)fxt1_quantize_CHROMA;
   1249 #endif
   1250 }
   1251 
   1252 
   1253 
   1254 /**
   1255  * Upscale an image by replication, not (typical) stretching.
   1256  * We use this when the image width or height is less than a
   1257  * certain size (4, 8) and we need to upscale an image.
   1258  */
   1259 static void
   1260 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
   1261                    GLsizei outWidth, GLsizei outHeight,
   1262                    GLint comps, const GLubyte *src, GLint srcRowStride,
   1263                    GLubyte *dest )
   1264 {
   1265    GLint i, j, k;
   1266 
   1267    assert(outWidth >= inWidth);
   1268    assert(outHeight >= inHeight);
   1269 #if 0
   1270    assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
   1271    assert((outWidth & 3) == 0);
   1272    assert((outHeight & 3) == 0);
   1273 #endif
   1274 
   1275    for (i = 0; i < outHeight; i++) {
   1276       const GLint ii = i % inHeight;
   1277       for (j = 0; j < outWidth; j++) {
   1278          const GLint jj = j % inWidth;
   1279          for (k = 0; k < comps; k++) {
   1280             dest[(i * outWidth + j) * comps + k]
   1281                = src[ii * srcRowStride + jj * comps + k];
   1282          }
   1283       }
   1284    }
   1285 }
   1286 
   1287 
   1288 static void
   1289 fxt1_encode (GLuint width, GLuint height, GLint comps,
   1290              const void *source, GLint srcRowStride,
   1291              void *dest, GLint destRowStride)
   1292 {
   1293    GLuint x, y;
   1294    const GLubyte *data;
   1295    GLuint *encoded = (GLuint *)dest;
   1296    void *newSource = NULL;
   1297 
   1298    assert(comps == 3 || comps == 4);
   1299 
   1300    /* Replicate image if width is not M8 or height is not M4 */
   1301    if ((width & 7) | (height & 3)) {
   1302       GLint newWidth = (width + 7) & ~7;
   1303       GLint newHeight = (height + 3) & ~3;
   1304       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
   1305       if (!newSource) {
   1306          GET_CURRENT_CONTEXT(ctx);
   1307          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
   1308          goto cleanUp;
   1309       }
   1310       upscale_teximage2d(width, height, newWidth, newHeight,
   1311                          comps, (const GLubyte *) source,
   1312                          srcRowStride, (GLubyte *) newSource);
   1313       source = newSource;
   1314       width = newWidth;
   1315       height = newHeight;
   1316       srcRowStride = comps * newWidth;
   1317    }
   1318 
   1319    data = (const GLubyte *) source;
   1320    destRowStride = (destRowStride - width * 2) / 4;
   1321    for (y = 0; y < height; y += 4) {
   1322       GLuint offs = 0 + (y + 0) * srcRowStride;
   1323       for (x = 0; x < width; x += 8) {
   1324          const GLubyte *lines[4];
   1325          lines[0] = &data[offs];
   1326          lines[1] = lines[0] + srcRowStride;
   1327          lines[2] = lines[1] + srcRowStride;
   1328          lines[3] = lines[2] + srcRowStride;
   1329          offs += 8 * comps;
   1330          fxt1_quantize(encoded, lines, comps);
   1331          /* 128 bits per 8x4 block */
   1332          encoded += 4;
   1333       }
   1334       encoded += destRowStride;
   1335    }
   1336 
   1337  cleanUp:
   1338    free(newSource);
   1339 }
   1340 
   1341 
   1342 /***************************************************************************\
   1343  * FXT1 decoder
   1344  *
   1345  * The decoder is based on GL_3DFX_texture_compression_FXT1
   1346  * specification and serves as a concept for the encoder.
   1347 \***************************************************************************/
   1348 
   1349 
   1350 /* lookup table for scaling 5 bit colors up to 8 bits */
   1351 static const GLubyte _rgb_scale_5[] = {
   1352    0,   8,   16,  25,  33,  41,  49,  58,
   1353    66,  74,  82,  90,  99,  107, 115, 123,
   1354    132, 140, 148, 156, 165, 173, 181, 189,
   1355    197, 206, 214, 222, 230, 239, 247, 255
   1356 };
   1357 
   1358 /* lookup table for scaling 6 bit colors up to 8 bits */
   1359 static const GLubyte _rgb_scale_6[] = {
   1360    0,   4,   8,   12,  16,  20,  24,  28,
   1361    32,  36,  40,  45,  49,  53,  57,  61,
   1362    65,  69,  73,  77,  81,  85,  89,  93,
   1363    97,  101, 105, 109, 113, 117, 121, 125,
   1364    130, 134, 138, 142, 146, 150, 154, 158,
   1365    162, 166, 170, 174, 178, 182, 186, 190,
   1366    194, 198, 202, 206, 210, 215, 219, 223,
   1367    227, 231, 235, 239, 243, 247, 251, 255
   1368 };
   1369 
   1370 
   1371 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
   1372 #define UP5(c) _rgb_scale_5[(c) & 31]
   1373 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
   1374 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
   1375 
   1376 
   1377 static void
   1378 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
   1379 {
   1380    const GLuint *cc;
   1381 
   1382    t *= 3;
   1383    cc = (const GLuint *)(code + t / 8);
   1384    t = (cc[0] >> (t & 7)) & 7;
   1385 
   1386    if (t == 7) {
   1387       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
   1388    } else {
   1389       GLubyte r, g, b;
   1390       cc = (const GLuint *)(code + 12);
   1391       if (t == 0) {
   1392          b = UP5(CC_SEL(cc, 0));
   1393          g = UP5(CC_SEL(cc, 5));
   1394          r = UP5(CC_SEL(cc, 10));
   1395       } else if (t == 6) {
   1396          b = UP5(CC_SEL(cc, 15));
   1397          g = UP5(CC_SEL(cc, 20));
   1398          r = UP5(CC_SEL(cc, 25));
   1399       } else {
   1400          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
   1401          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
   1402          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
   1403       }
   1404       rgba[RCOMP] = r;
   1405       rgba[GCOMP] = g;
   1406       rgba[BCOMP] = b;
   1407       rgba[ACOMP] = 255;
   1408    }
   1409 }
   1410 
   1411 
   1412 static void
   1413 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
   1414 {
   1415    const GLuint *cc;
   1416    GLuint kk;
   1417 
   1418    cc = (const GLuint *)code;
   1419    if (t & 16) {
   1420       cc++;
   1421       t &= 15;
   1422    }
   1423    t = (cc[0] >> (t * 2)) & 3;
   1424 
   1425    t *= 15;
   1426    cc = (const GLuint *)(code + 8 + t / 8);
   1427    kk = cc[0] >> (t & 7);
   1428    rgba[BCOMP] = UP5(kk);
   1429    rgba[GCOMP] = UP5(kk >> 5);
   1430    rgba[RCOMP] = UP5(kk >> 10);
   1431    rgba[ACOMP] = 255;
   1432 }
   1433 
   1434 
   1435 static void
   1436 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
   1437 {
   1438    const GLuint *cc;
   1439    GLuint col[2][3];
   1440    GLint glsb, selb;
   1441 
   1442    cc = (const GLuint *)code;
   1443    if (t & 16) {
   1444       t &= 15;
   1445       t = (cc[1] >> (t * 2)) & 3;
   1446       /* col 2 */
   1447       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
   1448       col[0][GCOMP] = CC_SEL(cc, 99);
   1449       col[0][RCOMP] = CC_SEL(cc, 104);
   1450       /* col 3 */
   1451       col[1][BCOMP] = CC_SEL(cc, 109);
   1452       col[1][GCOMP] = CC_SEL(cc, 114);
   1453       col[1][RCOMP] = CC_SEL(cc, 119);
   1454       glsb = CC_SEL(cc, 126);
   1455       selb = CC_SEL(cc, 33);
   1456    } else {
   1457       t = (cc[0] >> (t * 2)) & 3;
   1458       /* col 0 */
   1459       col[0][BCOMP] = CC_SEL(cc, 64);
   1460       col[0][GCOMP] = CC_SEL(cc, 69);
   1461       col[0][RCOMP] = CC_SEL(cc, 74);
   1462       /* col 1 */
   1463       col[1][BCOMP] = CC_SEL(cc, 79);
   1464       col[1][GCOMP] = CC_SEL(cc, 84);
   1465       col[1][RCOMP] = CC_SEL(cc, 89);
   1466       glsb = CC_SEL(cc, 125);
   1467       selb = CC_SEL(cc, 1);
   1468    }
   1469 
   1470    if (CC_SEL(cc, 124) & 1) {
   1471       /* alpha[0] == 1 */
   1472 
   1473       if (t == 3) {
   1474          /* zero */
   1475          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
   1476       } else {
   1477          GLubyte r, g, b;
   1478          if (t == 0) {
   1479             b = UP5(col[0][BCOMP]);
   1480             g = UP5(col[0][GCOMP]);
   1481             r = UP5(col[0][RCOMP]);
   1482          } else if (t == 2) {
   1483             b = UP5(col[1][BCOMP]);
   1484             g = UP6(col[1][GCOMP], glsb);
   1485             r = UP5(col[1][RCOMP]);
   1486          } else {
   1487             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
   1488             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
   1489             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
   1490          }
   1491          rgba[RCOMP] = r;
   1492          rgba[GCOMP] = g;
   1493          rgba[BCOMP] = b;
   1494          rgba[ACOMP] = 255;
   1495       }
   1496    } else {
   1497       /* alpha[0] == 0 */
   1498       GLubyte r, g, b;
   1499       if (t == 0) {
   1500          b = UP5(col[0][BCOMP]);
   1501          g = UP6(col[0][GCOMP], glsb ^ selb);
   1502          r = UP5(col[0][RCOMP]);
   1503       } else if (t == 3) {
   1504          b = UP5(col[1][BCOMP]);
   1505          g = UP6(col[1][GCOMP], glsb);
   1506          r = UP5(col[1][RCOMP]);
   1507       } else {
   1508          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
   1509          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
   1510                         UP6(col[1][GCOMP], glsb));
   1511          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
   1512       }
   1513       rgba[RCOMP] = r;
   1514       rgba[GCOMP] = g;
   1515       rgba[BCOMP] = b;
   1516       rgba[ACOMP] = 255;
   1517    }
   1518 }
   1519 
   1520 
   1521 static void
   1522 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
   1523 {
   1524    const GLuint *cc;
   1525    GLubyte r, g, b, a;
   1526 
   1527    cc = (const GLuint *)code;
   1528    if (CC_SEL(cc, 124) & 1) {
   1529       /* lerp == 1 */
   1530       GLuint col0[4];
   1531 
   1532       if (t & 16) {
   1533          t &= 15;
   1534          t = (cc[1] >> (t * 2)) & 3;
   1535          /* col 2 */
   1536          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
   1537          col0[GCOMP] = CC_SEL(cc, 99);
   1538          col0[RCOMP] = CC_SEL(cc, 104);
   1539          col0[ACOMP] = CC_SEL(cc, 119);
   1540       } else {
   1541          t = (cc[0] >> (t * 2)) & 3;
   1542          /* col 0 */
   1543          col0[BCOMP] = CC_SEL(cc, 64);
   1544          col0[GCOMP] = CC_SEL(cc, 69);
   1545          col0[RCOMP] = CC_SEL(cc, 74);
   1546          col0[ACOMP] = CC_SEL(cc, 109);
   1547       }
   1548 
   1549       if (t == 0) {
   1550          b = UP5(col0[BCOMP]);
   1551          g = UP5(col0[GCOMP]);
   1552          r = UP5(col0[RCOMP]);
   1553          a = UP5(col0[ACOMP]);
   1554       } else if (t == 3) {
   1555          b = UP5(CC_SEL(cc, 79));
   1556          g = UP5(CC_SEL(cc, 84));
   1557          r = UP5(CC_SEL(cc, 89));
   1558          a = UP5(CC_SEL(cc, 114));
   1559       } else {
   1560          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
   1561          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
   1562          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
   1563          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
   1564       }
   1565    } else {
   1566       /* lerp == 0 */
   1567 
   1568       if (t & 16) {
   1569          cc++;
   1570          t &= 15;
   1571       }
   1572       t = (cc[0] >> (t * 2)) & 3;
   1573 
   1574       if (t == 3) {
   1575          /* zero */
   1576          r = g = b = a = 0;
   1577       } else {
   1578          GLuint kk;
   1579          cc = (const GLuint *)code;
   1580          a = UP5(cc[3] >> (t * 5 + 13));
   1581          t *= 15;
   1582          cc = (const GLuint *)(code + 8 + t / 8);
   1583          kk = cc[0] >> (t & 7);
   1584          b = UP5(kk);
   1585          g = UP5(kk >> 5);
   1586          r = UP5(kk >> 10);
   1587       }
   1588    }
   1589    rgba[RCOMP] = r;
   1590    rgba[GCOMP] = g;
   1591    rgba[BCOMP] = b;
   1592    rgba[ACOMP] = a;
   1593 }
   1594 
   1595 
   1596 static void
   1597 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
   1598                GLint i, GLint j, GLubyte *rgba)
   1599 {
   1600    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
   1601       fxt1_decode_1HI,     /* cc-high   = "00?" */
   1602       fxt1_decode_1HI,     /* cc-high   = "00?" */
   1603       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
   1604       fxt1_decode_1ALPHA,  /* alpha     = "011" */
   1605       fxt1_decode_1MIXED,  /* mixed     = "1??" */
   1606       fxt1_decode_1MIXED,  /* mixed     = "1??" */
   1607       fxt1_decode_1MIXED,  /* mixed     = "1??" */
   1608       fxt1_decode_1MIXED   /* mixed     = "1??" */
   1609    };
   1610 
   1611    const GLubyte *code = (const GLubyte *)texture +
   1612                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
   1613    GLint mode = CC_SEL(code, 125);
   1614    GLint t = i & 7;
   1615 
   1616    if (t & 4) {
   1617       t += 12;
   1618    }
   1619    t += (j & 3) * 4;
   1620 
   1621    decode_1[mode](code, t, rgba);
   1622 }
   1623 
   1624 
   1625 
   1626 
   1627 static void
   1628 fetch_rgb_fxt1(const GLubyte *map,
   1629                GLint rowStride, GLint i, GLint j, GLfloat *texel)
   1630 {
   1631    GLubyte rgba[4];
   1632    fxt1_decode_1(map, rowStride, i, j, rgba);
   1633    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
   1634    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
   1635    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
   1636    texel[ACOMP] = 1.0F;
   1637 }
   1638 
   1639 
   1640 static void
   1641 fetch_rgba_fxt1(const GLubyte *map,
   1642                 GLint rowStride, GLint i, GLint j, GLfloat *texel)
   1643 {
   1644    GLubyte rgba[4];
   1645    fxt1_decode_1(map, rowStride, i, j, rgba);
   1646    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
   1647    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
   1648    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
   1649    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
   1650 }
   1651 
   1652 
   1653 compressed_fetch_func
   1654 _mesa_get_fxt_fetch_func(mesa_format format)
   1655 {
   1656    switch (format) {
   1657    case MESA_FORMAT_RGB_FXT1:
   1658       return fetch_rgb_fxt1;
   1659    case MESA_FORMAT_RGBA_FXT1:
   1660       return fetch_rgba_fxt1;
   1661    default:
   1662       return NULL;
   1663    }
   1664 }
   1665