1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25 26 /** 27 * \file texcompress_fxt1.c 28 * GL_3DFX_texture_compression_FXT1 support. 29 */ 30 31 32 #include "glheader.h" 33 #include "imports.h" 34 #include "image.h" 35 #include "macros.h" 36 #include "mipmap.h" 37 #include "texcompress.h" 38 #include "texcompress_fxt1.h" 39 #include "texstore.h" 40 41 42 static void 43 fxt1_encode (GLuint width, GLuint height, GLint comps, 44 const void *source, GLint srcRowStride, 45 void *dest, GLint destRowStride); 46 47 static void 48 fxt1_decode_1 (const void *texture, GLint stride, 49 GLint i, GLint j, GLubyte *rgba); 50 51 52 /** 53 * Store user's image in rgb_fxt1 format. 54 */ 55 GLboolean 56 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS) 57 { 58 const GLubyte *pixels; 59 GLint srcRowStride; 60 GLubyte *dst; 61 const GLubyte *tempImage = NULL; 62 63 assert(dstFormat == MESA_FORMAT_RGB_FXT1); 64 65 if (srcFormat != GL_RGB || 66 srcType != GL_UNSIGNED_BYTE || 67 ctx->_ImageTransferState || 68 ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth || 69 srcPacking->SwapBytes) { 70 /* convert image to RGB/GLubyte */ 71 GLubyte *tempImageSlices[1]; 72 int rgbRowStride = 3 * srcWidth * sizeof(GLubyte); 73 tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte)); 74 if (!tempImage) 75 return GL_FALSE; /* out of memory */ 76 tempImageSlices[0] = (GLubyte *) tempImage; 77 _mesa_texstore(ctx, dims, 78 baseInternalFormat, 79 MESA_FORMAT_RGB_UNORM8, 80 rgbRowStride, tempImageSlices, 81 srcWidth, srcHeight, srcDepth, 82 srcFormat, srcType, srcAddr, 83 srcPacking); 84 pixels = tempImage; 85 srcRowStride = 3 * srcWidth; 86 srcFormat = GL_RGB; 87 } 88 else { 89 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight, 90 srcFormat, srcType, 0, 0); 91 92 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, 93 srcType) / sizeof(GLubyte); 94 } 95 96 dst = dstSlices[0]; 97 98 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride, 99 dst, dstRowStride); 100 101 free((void*) tempImage); 102 103 return GL_TRUE; 104 } 105 106 107 /** 108 * Store user's image in rgba_fxt1 format. 109 */ 110 GLboolean 111 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS) 112 { 113 const GLubyte *pixels; 114 GLint srcRowStride; 115 GLubyte *dst; 116 const GLubyte *tempImage = NULL; 117 118 assert(dstFormat == MESA_FORMAT_RGBA_FXT1); 119 120 if (srcFormat != GL_RGBA || 121 srcType != GL_UNSIGNED_BYTE || 122 ctx->_ImageTransferState || 123 srcPacking->SwapBytes) { 124 /* convert image to RGBA/GLubyte */ 125 GLubyte *tempImageSlices[1]; 126 int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte); 127 tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte)); 128 if (!tempImage) 129 return GL_FALSE; /* out of memory */ 130 tempImageSlices[0] = (GLubyte *) tempImage; 131 _mesa_texstore(ctx, dims, 132 baseInternalFormat, 133 _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM 134 : MESA_FORMAT_A8B8G8R8_UNORM, 135 rgbaRowStride, tempImageSlices, 136 srcWidth, srcHeight, srcDepth, 137 srcFormat, srcType, srcAddr, 138 srcPacking); 139 pixels = tempImage; 140 srcRowStride = 4 * srcWidth; 141 srcFormat = GL_RGBA; 142 } 143 else { 144 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight, 145 srcFormat, srcType, 0, 0); 146 147 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, 148 srcType) / sizeof(GLubyte); 149 } 150 151 dst = dstSlices[0]; 152 153 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride, 154 dst, dstRowStride); 155 156 free((void*) tempImage); 157 158 return GL_TRUE; 159 } 160 161 162 /***************************************************************************\ 163 * FXT1 encoder 164 * 165 * The encoder was built by reversing the decoder, 166 * and is vaguely based on Texus2 by 3dfx. Note that this code 167 * is merely a proof of concept, since it is highly UNoptimized; 168 * moreover, it is sub-optimal due to initial conditions passed 169 * to Lloyd's algorithm (the interpolation modes are even worse). 170 \***************************************************************************/ 171 172 173 #define MAX_COMP 4 /* ever needed maximum number of components in texel */ 174 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */ 175 #define N_TEXELS 32 /* number of texels in a block (always 32) */ 176 #define LL_N_REP 50 /* number of iterations in lloyd's vq */ 177 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */ 178 #define LL_RMS_E 255 /* fault tolerance (maximum error) */ 179 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */ 180 static const GLuint zero = 0; 181 #define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0) 182 183 /* 184 * Define a 64-bit unsigned integer type and macros 185 */ 186 #if 1 187 188 #define FX64_NATIVE 1 189 190 typedef uint64_t Fx64; 191 192 #define FX64_MOV32(a, b) a = b 193 #define FX64_OR32(a, b) a |= b 194 #define FX64_SHL(a, c) a <<= c 195 196 #else 197 198 #define FX64_NATIVE 0 199 200 typedef struct { 201 GLuint lo, hi; 202 } Fx64; 203 204 #define FX64_MOV32(a, b) a.lo = b 205 #define FX64_OR32(a, b) a.lo |= b 206 207 #define FX64_SHL(a, c) \ 208 do { \ 209 if ((c) >= 32) { \ 210 a.hi = a.lo << ((c) - 32); \ 211 a.lo = 0; \ 212 } else { \ 213 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \ 214 a.lo <<= (c); \ 215 } \ 216 } while (0) 217 218 #endif 219 220 221 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */ 222 #define SAFECDOT 1 /* for paranoids */ 223 224 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \ 225 do { \ 226 /* compute interpolation vector */ \ 227 GLfloat d2 = 0.0F; \ 228 GLfloat rd2; \ 229 \ 230 for (i = 0; i < NC; i++) { \ 231 IV[i] = (V1[i] - V0[i]) * F(i); \ 232 d2 += IV[i] * IV[i]; \ 233 } \ 234 rd2 = (GLfloat)NV / d2; \ 235 B = 0; \ 236 for (i = 0; i < NC; i++) { \ 237 IV[i] *= F(i); \ 238 B -= IV[i] * V0[i]; \ 239 IV[i] *= rd2; \ 240 } \ 241 B = B * rd2 + 0.5f; \ 242 } while (0) 243 244 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\ 245 do { \ 246 GLfloat dot = 0.0F; \ 247 for (i = 0; i < NC; i++) { \ 248 dot += V[i] * IV[i]; \ 249 } \ 250 TEXEL = (GLint)(dot + B); \ 251 if (SAFECDOT) { \ 252 if (TEXEL < 0) { \ 253 TEXEL = 0; \ 254 } else if (TEXEL > NV) { \ 255 TEXEL = NV; \ 256 } \ 257 } \ 258 } while (0) 259 260 261 static GLint 262 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv, 263 GLubyte input[MAX_COMP], GLint nc) 264 { 265 GLint i, j, best = -1; 266 GLfloat err = 1e9; /* big enough */ 267 268 for (j = 0; j < nv; j++) { 269 GLfloat e = 0.0F; 270 for (i = 0; i < nc; i++) { 271 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]); 272 } 273 if (e < err) { 274 err = e; 275 best = j; 276 } 277 } 278 279 return best; 280 } 281 282 283 static GLint 284 fxt1_worst (GLfloat vec[MAX_COMP], 285 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n) 286 { 287 GLint i, k, worst = -1; 288 GLfloat err = -1.0F; /* small enough */ 289 290 for (k = 0; k < n; k++) { 291 GLfloat e = 0.0F; 292 for (i = 0; i < nc; i++) { 293 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]); 294 } 295 if (e > err) { 296 err = e; 297 worst = k; 298 } 299 } 300 301 return worst; 302 } 303 304 305 static GLint 306 fxt1_variance (GLdouble variance[MAX_COMP], 307 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n) 308 { 309 GLint i, k, best = 0; 310 GLint sx, sx2; 311 GLdouble var, maxvar = -1; /* small enough */ 312 GLdouble teenth = 1.0 / n; 313 314 for (i = 0; i < nc; i++) { 315 sx = sx2 = 0; 316 for (k = 0; k < n; k++) { 317 GLint t = input[k][i]; 318 sx += t; 319 sx2 += t * t; 320 } 321 var = sx2 * teenth - sx * sx * teenth * teenth; 322 if (maxvar < var) { 323 maxvar = var; 324 best = i; 325 } 326 if (variance) { 327 variance[i] = var; 328 } 329 } 330 331 return best; 332 } 333 334 335 static GLint 336 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv, 337 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n) 338 { 339 #if 0 340 /* Choose colors from a grid. 341 */ 342 GLint i, j; 343 344 for (j = 0; j < nv; j++) { 345 GLint m = j * (n - 1) / (nv - 1); 346 for (i = 0; i < nc; i++) { 347 vec[j][i] = input[m][i]; 348 } 349 } 350 #else 351 /* Our solution here is to find the darkest and brightest colors in 352 * the 8x4 tile and use those as the two representative colors. 353 * There are probably better algorithms to use (histogram-based). 354 */ 355 GLint i, j, k; 356 GLint minSum = 2000; /* big enough */ 357 GLint maxSum = -1; /* small enough */ 358 GLint minCol = 0; /* phoudoin: silent compiler! */ 359 GLint maxCol = 0; /* phoudoin: silent compiler! */ 360 361 struct { 362 GLint flag; 363 GLint key; 364 GLint freq; 365 GLint idx; 366 } hist[N_TEXELS]; 367 GLint lenh = 0; 368 369 memset(hist, 0, sizeof(hist)); 370 371 for (k = 0; k < n; k++) { 372 GLint l; 373 GLint key = 0; 374 GLint sum = 0; 375 for (i = 0; i < nc; i++) { 376 key <<= 8; 377 key |= input[k][i]; 378 sum += input[k][i]; 379 } 380 for (l = 0; l < n; l++) { 381 if (!hist[l].flag) { 382 /* alloc new slot */ 383 hist[l].flag = !0; 384 hist[l].key = key; 385 hist[l].freq = 1; 386 hist[l].idx = k; 387 lenh = l + 1; 388 break; 389 } else if (hist[l].key == key) { 390 hist[l].freq++; 391 break; 392 } 393 } 394 if (minSum > sum) { 395 minSum = sum; 396 minCol = k; 397 } 398 if (maxSum < sum) { 399 maxSum = sum; 400 maxCol = k; 401 } 402 } 403 404 if (lenh <= nv) { 405 for (j = 0; j < lenh; j++) { 406 for (i = 0; i < nc; i++) { 407 vec[j][i] = (GLfloat)input[hist[j].idx][i]; 408 } 409 } 410 for (; j < nv; j++) { 411 for (i = 0; i < nc; i++) { 412 vec[j][i] = vec[0][i]; 413 } 414 } 415 return 0; 416 } 417 418 for (j = 0; j < nv; j++) { 419 for (i = 0; i < nc; i++) { 420 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1); 421 } 422 } 423 #endif 424 425 return !0; 426 } 427 428 429 static GLint 430 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv, 431 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n) 432 { 433 /* Use the generalized lloyd's algorithm for VQ: 434 * find 4 color vectors. 435 * 436 * for each sample color 437 * sort to nearest vector. 438 * 439 * replace each vector with the centroid of its matching colors. 440 * 441 * repeat until RMS doesn't improve. 442 * 443 * if a color vector has no samples, or becomes the same as another 444 * vector, replace it with the color which is farthest from a sample. 445 * 446 * vec[][MAX_COMP] initial vectors and resulting colors 447 * nv number of resulting colors required 448 * input[N_TEXELS][MAX_COMP] input texels 449 * nc number of components in input / vec 450 * n number of input samples 451 */ 452 453 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */ 454 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */ 455 GLfloat error, lasterror = 1e9; 456 457 GLint i, j, k, rep; 458 459 /* the quantizer */ 460 for (rep = 0; rep < LL_N_REP; rep++) { 461 /* reset sums & counters */ 462 for (j = 0; j < nv; j++) { 463 for (i = 0; i < nc; i++) { 464 sum[j][i] = 0; 465 } 466 cnt[j] = 0; 467 } 468 error = 0; 469 470 /* scan whole block */ 471 for (k = 0; k < n; k++) { 472 #if 1 473 GLint best = -1; 474 GLfloat err = 1e9; /* big enough */ 475 /* determine best vector */ 476 for (j = 0; j < nv; j++) { 477 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) + 478 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) + 479 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]); 480 if (nc == 4) { 481 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]); 482 } 483 if (e < err) { 484 err = e; 485 best = j; 486 } 487 } 488 #else 489 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err); 490 #endif 491 assert(best >= 0); 492 /* add in closest color */ 493 for (i = 0; i < nc; i++) { 494 sum[best][i] += input[k][i]; 495 } 496 /* mark this vector as used */ 497 cnt[best]++; 498 /* accumulate error */ 499 error += err; 500 } 501 502 /* check RMS */ 503 if ((error < LL_RMS_E) || 504 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) { 505 return !0; /* good match */ 506 } 507 lasterror = error; 508 509 /* move each vector to the barycenter of its closest colors */ 510 for (j = 0; j < nv; j++) { 511 if (cnt[j]) { 512 GLfloat div = 1.0F / cnt[j]; 513 for (i = 0; i < nc; i++) { 514 vec[j][i] = div * sum[j][i]; 515 } 516 } else { 517 /* this vec has no samples or is identical with a previous vec */ 518 GLint worst = fxt1_worst(vec[j], input, nc, n); 519 for (i = 0; i < nc; i++) { 520 vec[j][i] = input[worst][i]; 521 } 522 } 523 } 524 } 525 526 return 0; /* could not converge fast enough */ 527 } 528 529 530 static void 531 fxt1_quantize_CHROMA (GLuint *cc, 532 GLubyte input[N_TEXELS][MAX_COMP]) 533 { 534 const GLint n_vect = 4; /* 4 base vectors to find */ 535 const GLint n_comp = 3; /* 3 components: R, G, B */ 536 GLfloat vec[MAX_VECT][MAX_COMP]; 537 GLint i, j, k; 538 Fx64 hi; /* high quadword */ 539 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */ 540 541 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) { 542 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS); 543 } 544 545 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */ 546 for (j = n_vect - 1; j >= 0; j--) { 547 for (i = 0; i < n_comp; i++) { 548 /* add in colors */ 549 FX64_SHL(hi, 5); 550 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F)); 551 } 552 } 553 ((Fx64 *)cc)[1] = hi; 554 555 lohi = lolo = 0; 556 /* right microtile */ 557 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) { 558 lohi <<= 2; 559 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp); 560 } 561 /* left microtile */ 562 for (; k >= 0; k--) { 563 lolo <<= 2; 564 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp); 565 } 566 cc[1] = lohi; 567 cc[0] = lolo; 568 } 569 570 571 static void 572 fxt1_quantize_ALPHA0 (GLuint *cc, 573 GLubyte input[N_TEXELS][MAX_COMP], 574 GLubyte reord[N_TEXELS][MAX_COMP], GLint n) 575 { 576 const GLint n_vect = 3; /* 3 base vectors to find */ 577 const GLint n_comp = 4; /* 4 components: R, G, B, A */ 578 GLfloat vec[MAX_VECT][MAX_COMP]; 579 GLint i, j, k; 580 Fx64 hi; /* high quadword */ 581 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */ 582 583 /* the last vector indicates zero */ 584 for (i = 0; i < n_comp; i++) { 585 vec[n_vect][i] = 0; 586 } 587 588 /* the first n texels in reord are guaranteed to be non-zero */ 589 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) { 590 fxt1_lloyd(vec, n_vect, reord, n_comp, n); 591 } 592 593 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */ 594 for (j = n_vect - 1; j >= 0; j--) { 595 /* add in alphas */ 596 FX64_SHL(hi, 5); 597 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F)); 598 } 599 for (j = n_vect - 1; j >= 0; j--) { 600 for (i = 0; i < n_comp - 1; i++) { 601 /* add in colors */ 602 FX64_SHL(hi, 5); 603 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F)); 604 } 605 } 606 ((Fx64 *)cc)[1] = hi; 607 608 lohi = lolo = 0; 609 /* right microtile */ 610 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) { 611 lohi <<= 2; 612 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp); 613 } 614 /* left microtile */ 615 for (; k >= 0; k--) { 616 lolo <<= 2; 617 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp); 618 } 619 cc[1] = lohi; 620 cc[0] = lolo; 621 } 622 623 624 static void 625 fxt1_quantize_ALPHA1 (GLuint *cc, 626 GLubyte input[N_TEXELS][MAX_COMP]) 627 { 628 const GLint n_vect = 3; /* highest vector number in each microtile */ 629 const GLint n_comp = 4; /* 4 components: R, G, B, A */ 630 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */ 631 GLfloat b, iv[MAX_COMP]; /* interpolation vector */ 632 GLint i, j, k; 633 Fx64 hi; /* high quadword */ 634 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */ 635 636 GLint minSum; 637 GLint maxSum; 638 GLint minColL = 0, maxColL = 0; 639 GLint minColR = 0, maxColR = 0; 640 GLint sumL = 0, sumR = 0; 641 GLint nn_comp; 642 /* Our solution here is to find the darkest and brightest colors in 643 * the 4x4 tile and use those as the two representative colors. 644 * There are probably better algorithms to use (histogram-based). 645 */ 646 nn_comp = n_comp; 647 while ((minColL == maxColL) && nn_comp) { 648 minSum = 2000; /* big enough */ 649 maxSum = -1; /* small enough */ 650 for (k = 0; k < N_TEXELS / 2; k++) { 651 GLint sum = 0; 652 for (i = 0; i < nn_comp; i++) { 653 sum += input[k][i]; 654 } 655 if (minSum > sum) { 656 minSum = sum; 657 minColL = k; 658 } 659 if (maxSum < sum) { 660 maxSum = sum; 661 maxColL = k; 662 } 663 sumL += sum; 664 } 665 666 nn_comp--; 667 } 668 669 nn_comp = n_comp; 670 while ((minColR == maxColR) && nn_comp) { 671 minSum = 2000; /* big enough */ 672 maxSum = -1; /* small enough */ 673 for (k = N_TEXELS / 2; k < N_TEXELS; k++) { 674 GLint sum = 0; 675 for (i = 0; i < nn_comp; i++) { 676 sum += input[k][i]; 677 } 678 if (minSum > sum) { 679 minSum = sum; 680 minColR = k; 681 } 682 if (maxSum < sum) { 683 maxSum = sum; 684 maxColR = k; 685 } 686 sumR += sum; 687 } 688 689 nn_comp--; 690 } 691 692 /* choose the common vector (yuck!) */ 693 { 694 GLint j1, j2; 695 GLint v1 = 0, v2 = 0; 696 GLfloat err = 1e9; /* big enough */ 697 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ 698 for (i = 0; i < n_comp; i++) { 699 tv[0][i] = input[minColL][i]; 700 tv[1][i] = input[maxColL][i]; 701 tv[2][i] = input[minColR][i]; 702 tv[3][i] = input[maxColR][i]; 703 } 704 for (j1 = 0; j1 < 2; j1++) { 705 for (j2 = 2; j2 < 4; j2++) { 706 GLfloat e = 0.0F; 707 for (i = 0; i < n_comp; i++) { 708 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]); 709 } 710 if (e < err) { 711 err = e; 712 v1 = j1; 713 v2 = j2; 714 } 715 } 716 } 717 for (i = 0; i < n_comp; i++) { 718 vec[0][i] = tv[1 - v1][i]; 719 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR); 720 vec[2][i] = tv[5 - v2][i]; 721 } 722 } 723 724 /* left microtile */ 725 cc[0] = 0; 726 if (minColL != maxColL) { 727 /* compute interpolation vector */ 728 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); 729 730 /* add in texels */ 731 lolo = 0; 732 for (k = N_TEXELS / 2 - 1; k >= 0; k--) { 733 GLint texel; 734 /* interpolate color */ 735 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 736 /* add in texel */ 737 lolo <<= 2; 738 lolo |= texel; 739 } 740 741 cc[0] = lolo; 742 } 743 744 /* right microtile */ 745 cc[1] = 0; 746 if (minColR != maxColR) { 747 /* compute interpolation vector */ 748 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]); 749 750 /* add in texels */ 751 lohi = 0; 752 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { 753 GLint texel; 754 /* interpolate color */ 755 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 756 /* add in texel */ 757 lohi <<= 2; 758 lohi |= texel; 759 } 760 761 cc[1] = lohi; 762 } 763 764 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */ 765 for (j = n_vect - 1; j >= 0; j--) { 766 /* add in alphas */ 767 FX64_SHL(hi, 5); 768 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F)); 769 } 770 for (j = n_vect - 1; j >= 0; j--) { 771 for (i = 0; i < n_comp - 1; i++) { 772 /* add in colors */ 773 FX64_SHL(hi, 5); 774 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F)); 775 } 776 } 777 ((Fx64 *)cc)[1] = hi; 778 } 779 780 781 static void 782 fxt1_quantize_HI (GLuint *cc, 783 GLubyte input[N_TEXELS][MAX_COMP], 784 GLubyte reord[N_TEXELS][MAX_COMP], GLint n) 785 { 786 const GLint n_vect = 6; /* highest vector number */ 787 const GLint n_comp = 3; /* 3 components: R, G, B */ 788 GLfloat b = 0.0F; /* phoudoin: silent compiler! */ 789 GLfloat iv[MAX_COMP]; /* interpolation vector */ 790 GLint i, k; 791 GLuint hihi; /* high quadword: hi dword */ 792 793 GLint minSum = 2000; /* big enough */ 794 GLint maxSum = -1; /* small enough */ 795 GLint minCol = 0; /* phoudoin: silent compiler! */ 796 GLint maxCol = 0; /* phoudoin: silent compiler! */ 797 798 /* Our solution here is to find the darkest and brightest colors in 799 * the 8x4 tile and use those as the two representative colors. 800 * There are probably better algorithms to use (histogram-based). 801 */ 802 for (k = 0; k < n; k++) { 803 GLint sum = 0; 804 for (i = 0; i < n_comp; i++) { 805 sum += reord[k][i]; 806 } 807 if (minSum > sum) { 808 minSum = sum; 809 minCol = k; 810 } 811 if (maxSum < sum) { 812 maxSum = sum; 813 maxCol = k; 814 } 815 } 816 817 hihi = 0; /* cc-hi = "00" */ 818 for (i = 0; i < n_comp; i++) { 819 /* add in colors */ 820 hihi <<= 5; 821 hihi |= reord[maxCol][i] >> 3; 822 } 823 for (i = 0; i < n_comp; i++) { 824 /* add in colors */ 825 hihi <<= 5; 826 hihi |= reord[minCol][i] >> 3; 827 } 828 cc[3] = hihi; 829 cc[0] = cc[1] = cc[2] = 0; 830 831 /* compute interpolation vector */ 832 if (minCol != maxCol) { 833 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]); 834 } 835 836 /* add in texels */ 837 for (k = N_TEXELS - 1; k >= 0; k--) { 838 GLint t = k * 3; 839 GLuint *kk = (GLuint *)((char *)cc + t / 8); 840 GLint texel = n_vect + 1; /* transparent black */ 841 842 if (!ISTBLACK(input[k])) { 843 if (minCol != maxCol) { 844 /* interpolate color */ 845 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 846 /* add in texel */ 847 kk[0] |= texel << (t & 7); 848 } 849 } else { 850 /* add in texel */ 851 kk[0] |= texel << (t & 7); 852 } 853 } 854 } 855 856 857 static void 858 fxt1_quantize_MIXED1 (GLuint *cc, 859 GLubyte input[N_TEXELS][MAX_COMP]) 860 { 861 const GLint n_vect = 2; /* highest vector number in each microtile */ 862 const GLint n_comp = 3; /* 3 components: R, G, B */ 863 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ 864 GLfloat b, iv[MAX_COMP]; /* interpolation vector */ 865 GLint i, j, k; 866 Fx64 hi; /* high quadword */ 867 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */ 868 869 GLint minSum; 870 GLint maxSum; 871 GLint minColL = 0, maxColL = -1; 872 GLint minColR = 0, maxColR = -1; 873 874 /* Our solution here is to find the darkest and brightest colors in 875 * the 4x4 tile and use those as the two representative colors. 876 * There are probably better algorithms to use (histogram-based). 877 */ 878 minSum = 2000; /* big enough */ 879 maxSum = -1; /* small enough */ 880 for (k = 0; k < N_TEXELS / 2; k++) { 881 if (!ISTBLACK(input[k])) { 882 GLint sum = 0; 883 for (i = 0; i < n_comp; i++) { 884 sum += input[k][i]; 885 } 886 if (minSum > sum) { 887 minSum = sum; 888 minColL = k; 889 } 890 if (maxSum < sum) { 891 maxSum = sum; 892 maxColL = k; 893 } 894 } 895 } 896 minSum = 2000; /* big enough */ 897 maxSum = -1; /* small enough */ 898 for (; k < N_TEXELS; k++) { 899 if (!ISTBLACK(input[k])) { 900 GLint sum = 0; 901 for (i = 0; i < n_comp; i++) { 902 sum += input[k][i]; 903 } 904 if (minSum > sum) { 905 minSum = sum; 906 minColR = k; 907 } 908 if (maxSum < sum) { 909 maxSum = sum; 910 maxColR = k; 911 } 912 } 913 } 914 915 /* left microtile */ 916 if (maxColL == -1) { 917 /* all transparent black */ 918 cc[0] = ~0u; 919 for (i = 0; i < n_comp; i++) { 920 vec[0][i] = 0; 921 vec[1][i] = 0; 922 } 923 } else { 924 cc[0] = 0; 925 for (i = 0; i < n_comp; i++) { 926 vec[0][i] = input[minColL][i]; 927 vec[1][i] = input[maxColL][i]; 928 } 929 if (minColL != maxColL) { 930 /* compute interpolation vector */ 931 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); 932 933 /* add in texels */ 934 lolo = 0; 935 for (k = N_TEXELS / 2 - 1; k >= 0; k--) { 936 GLint texel = n_vect + 1; /* transparent black */ 937 if (!ISTBLACK(input[k])) { 938 /* interpolate color */ 939 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 940 } 941 /* add in texel */ 942 lolo <<= 2; 943 lolo |= texel; 944 } 945 cc[0] = lolo; 946 } 947 } 948 949 /* right microtile */ 950 if (maxColR == -1) { 951 /* all transparent black */ 952 cc[1] = ~0u; 953 for (i = 0; i < n_comp; i++) { 954 vec[2][i] = 0; 955 vec[3][i] = 0; 956 } 957 } else { 958 cc[1] = 0; 959 for (i = 0; i < n_comp; i++) { 960 vec[2][i] = input[minColR][i]; 961 vec[3][i] = input[maxColR][i]; 962 } 963 if (minColR != maxColR) { 964 /* compute interpolation vector */ 965 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]); 966 967 /* add in texels */ 968 lohi = 0; 969 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { 970 GLint texel = n_vect + 1; /* transparent black */ 971 if (!ISTBLACK(input[k])) { 972 /* interpolate color */ 973 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 974 } 975 /* add in texel */ 976 lohi <<= 2; 977 lohi |= texel; 978 } 979 cc[1] = lohi; 980 } 981 } 982 983 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */ 984 for (j = 2 * 2 - 1; j >= 0; j--) { 985 for (i = 0; i < n_comp; i++) { 986 /* add in colors */ 987 FX64_SHL(hi, 5); 988 FX64_OR32(hi, vec[j][i] >> 3); 989 } 990 } 991 ((Fx64 *)cc)[1] = hi; 992 } 993 994 995 static void 996 fxt1_quantize_MIXED0 (GLuint *cc, 997 GLubyte input[N_TEXELS][MAX_COMP]) 998 { 999 const GLint n_vect = 3; /* highest vector number in each microtile */ 1000 const GLint n_comp = 3; /* 3 components: R, G, B */ 1001 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ 1002 GLfloat b, iv[MAX_COMP]; /* interpolation vector */ 1003 GLint i, j, k; 1004 Fx64 hi; /* high quadword */ 1005 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */ 1006 1007 GLint minColL = 0, maxColL = 0; 1008 GLint minColR = 0, maxColR = 0; 1009 #if 0 1010 GLint minSum; 1011 GLint maxSum; 1012 1013 /* Our solution here is to find the darkest and brightest colors in 1014 * the 4x4 tile and use those as the two representative colors. 1015 * There are probably better algorithms to use (histogram-based). 1016 */ 1017 minSum = 2000; /* big enough */ 1018 maxSum = -1; /* small enough */ 1019 for (k = 0; k < N_TEXELS / 2; k++) { 1020 GLint sum = 0; 1021 for (i = 0; i < n_comp; i++) { 1022 sum += input[k][i]; 1023 } 1024 if (minSum > sum) { 1025 minSum = sum; 1026 minColL = k; 1027 } 1028 if (maxSum < sum) { 1029 maxSum = sum; 1030 maxColL = k; 1031 } 1032 } 1033 minSum = 2000; /* big enough */ 1034 maxSum = -1; /* small enough */ 1035 for (; k < N_TEXELS; k++) { 1036 GLint sum = 0; 1037 for (i = 0; i < n_comp; i++) { 1038 sum += input[k][i]; 1039 } 1040 if (minSum > sum) { 1041 minSum = sum; 1042 minColR = k; 1043 } 1044 if (maxSum < sum) { 1045 maxSum = sum; 1046 maxColR = k; 1047 } 1048 } 1049 #else 1050 GLint minVal; 1051 GLint maxVal; 1052 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2); 1053 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2); 1054 1055 /* Scan the channel with max variance for lo & hi 1056 * and use those as the two representative colors. 1057 */ 1058 minVal = 2000; /* big enough */ 1059 maxVal = -1; /* small enough */ 1060 for (k = 0; k < N_TEXELS / 2; k++) { 1061 GLint t = input[k][maxVarL]; 1062 if (minVal > t) { 1063 minVal = t; 1064 minColL = k; 1065 } 1066 if (maxVal < t) { 1067 maxVal = t; 1068 maxColL = k; 1069 } 1070 } 1071 minVal = 2000; /* big enough */ 1072 maxVal = -1; /* small enough */ 1073 for (; k < N_TEXELS; k++) { 1074 GLint t = input[k][maxVarR]; 1075 if (minVal > t) { 1076 minVal = t; 1077 minColR = k; 1078 } 1079 if (maxVal < t) { 1080 maxVal = t; 1081 maxColR = k; 1082 } 1083 } 1084 #endif 1085 1086 /* left microtile */ 1087 cc[0] = 0; 1088 for (i = 0; i < n_comp; i++) { 1089 vec[0][i] = input[minColL][i]; 1090 vec[1][i] = input[maxColL][i]; 1091 } 1092 if (minColL != maxColL) { 1093 /* compute interpolation vector */ 1094 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); 1095 1096 /* add in texels */ 1097 lolo = 0; 1098 for (k = N_TEXELS / 2 - 1; k >= 0; k--) { 1099 GLint texel; 1100 /* interpolate color */ 1101 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 1102 /* add in texel */ 1103 lolo <<= 2; 1104 lolo |= texel; 1105 } 1106 1107 /* funky encoding for LSB of green */ 1108 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) { 1109 for (i = 0; i < n_comp; i++) { 1110 vec[1][i] = input[minColL][i]; 1111 vec[0][i] = input[maxColL][i]; 1112 } 1113 lolo = ~lolo; 1114 } 1115 1116 cc[0] = lolo; 1117 } 1118 1119 /* right microtile */ 1120 cc[1] = 0; 1121 for (i = 0; i < n_comp; i++) { 1122 vec[2][i] = input[minColR][i]; 1123 vec[3][i] = input[maxColR][i]; 1124 } 1125 if (minColR != maxColR) { 1126 /* compute interpolation vector */ 1127 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]); 1128 1129 /* add in texels */ 1130 lohi = 0; 1131 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { 1132 GLint texel; 1133 /* interpolate color */ 1134 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); 1135 /* add in texel */ 1136 lohi <<= 2; 1137 lohi |= texel; 1138 } 1139 1140 /* funky encoding for LSB of green */ 1141 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) { 1142 for (i = 0; i < n_comp; i++) { 1143 vec[3][i] = input[minColR][i]; 1144 vec[2][i] = input[maxColR][i]; 1145 } 1146 lohi = ~lohi; 1147 } 1148 1149 cc[1] = lohi; 1150 } 1151 1152 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */ 1153 for (j = 2 * 2 - 1; j >= 0; j--) { 1154 for (i = 0; i < n_comp; i++) { 1155 /* add in colors */ 1156 FX64_SHL(hi, 5); 1157 FX64_OR32(hi, vec[j][i] >> 3); 1158 } 1159 } 1160 ((Fx64 *)cc)[1] = hi; 1161 } 1162 1163 1164 static void 1165 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps) 1166 { 1167 GLint trualpha; 1168 GLubyte reord[N_TEXELS][MAX_COMP]; 1169 1170 GLubyte input[N_TEXELS][MAX_COMP]; 1171 GLint i, k, l; 1172 1173 if (comps == 3) { 1174 /* make the whole block opaque */ 1175 memset(input, -1, sizeof(input)); 1176 } 1177 1178 /* 8 texels each line */ 1179 for (l = 0; l < 4; l++) { 1180 for (k = 0; k < 4; k++) { 1181 for (i = 0; i < comps; i++) { 1182 input[k + l * 4][i] = *lines[l]++; 1183 } 1184 } 1185 for (; k < 8; k++) { 1186 for (i = 0; i < comps; i++) { 1187 input[k + l * 4 + 12][i] = *lines[l]++; 1188 } 1189 } 1190 } 1191 1192 /* block layout: 1193 * 00, 01, 02, 03, 08, 09, 0a, 0b 1194 * 10, 11, 12, 13, 18, 19, 1a, 1b 1195 * 04, 05, 06, 07, 0c, 0d, 0e, 0f 1196 * 14, 15, 16, 17, 1c, 1d, 1e, 1f 1197 */ 1198 1199 /* [dBorca] 1200 * stupidity flows forth from this 1201 */ 1202 l = N_TEXELS; 1203 trualpha = 0; 1204 if (comps == 4) { 1205 /* skip all transparent black texels */ 1206 l = 0; 1207 for (k = 0; k < N_TEXELS; k++) { 1208 /* test all components against 0 */ 1209 if (!ISTBLACK(input[k])) { 1210 /* texel is not transparent black */ 1211 COPY_4UBV(reord[l], input[k]); 1212 if (reord[l][ACOMP] < (255 - ALPHA_TS)) { 1213 /* non-opaque texel */ 1214 trualpha = !0; 1215 } 1216 l++; 1217 } 1218 } 1219 } 1220 1221 #if 0 1222 if (trualpha) { 1223 fxt1_quantize_ALPHA0(cc, input, reord, l); 1224 } else if (l == 0) { 1225 cc[0] = cc[1] = cc[2] = -1; 1226 cc[3] = 0; 1227 } else if (l < N_TEXELS) { 1228 fxt1_quantize_HI(cc, input, reord, l); 1229 } else { 1230 fxt1_quantize_CHROMA(cc, input); 1231 } 1232 (void)fxt1_quantize_ALPHA1; 1233 (void)fxt1_quantize_MIXED1; 1234 (void)fxt1_quantize_MIXED0; 1235 #else 1236 if (trualpha) { 1237 fxt1_quantize_ALPHA1(cc, input); 1238 } else if (l == 0) { 1239 cc[0] = cc[1] = cc[2] = ~0u; 1240 cc[3] = 0; 1241 } else if (l < N_TEXELS) { 1242 fxt1_quantize_MIXED1(cc, input); 1243 } else { 1244 fxt1_quantize_MIXED0(cc, input); 1245 } 1246 (void)fxt1_quantize_ALPHA0; 1247 (void)fxt1_quantize_HI; 1248 (void)fxt1_quantize_CHROMA; 1249 #endif 1250 } 1251 1252 1253 1254 /** 1255 * Upscale an image by replication, not (typical) stretching. 1256 * We use this when the image width or height is less than a 1257 * certain size (4, 8) and we need to upscale an image. 1258 */ 1259 static void 1260 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight, 1261 GLsizei outWidth, GLsizei outHeight, 1262 GLint comps, const GLubyte *src, GLint srcRowStride, 1263 GLubyte *dest ) 1264 { 1265 GLint i, j, k; 1266 1267 assert(outWidth >= inWidth); 1268 assert(outHeight >= inHeight); 1269 #if 0 1270 assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2); 1271 assert((outWidth & 3) == 0); 1272 assert((outHeight & 3) == 0); 1273 #endif 1274 1275 for (i = 0; i < outHeight; i++) { 1276 const GLint ii = i % inHeight; 1277 for (j = 0; j < outWidth; j++) { 1278 const GLint jj = j % inWidth; 1279 for (k = 0; k < comps; k++) { 1280 dest[(i * outWidth + j) * comps + k] 1281 = src[ii * srcRowStride + jj * comps + k]; 1282 } 1283 } 1284 } 1285 } 1286 1287 1288 static void 1289 fxt1_encode (GLuint width, GLuint height, GLint comps, 1290 const void *source, GLint srcRowStride, 1291 void *dest, GLint destRowStride) 1292 { 1293 GLuint x, y; 1294 const GLubyte *data; 1295 GLuint *encoded = (GLuint *)dest; 1296 void *newSource = NULL; 1297 1298 assert(comps == 3 || comps == 4); 1299 1300 /* Replicate image if width is not M8 or height is not M4 */ 1301 if ((width & 7) | (height & 3)) { 1302 GLint newWidth = (width + 7) & ~7; 1303 GLint newHeight = (height + 3) & ~3; 1304 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte)); 1305 if (!newSource) { 1306 GET_CURRENT_CONTEXT(ctx); 1307 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression"); 1308 goto cleanUp; 1309 } 1310 upscale_teximage2d(width, height, newWidth, newHeight, 1311 comps, (const GLubyte *) source, 1312 srcRowStride, (GLubyte *) newSource); 1313 source = newSource; 1314 width = newWidth; 1315 height = newHeight; 1316 srcRowStride = comps * newWidth; 1317 } 1318 1319 data = (const GLubyte *) source; 1320 destRowStride = (destRowStride - width * 2) / 4; 1321 for (y = 0; y < height; y += 4) { 1322 GLuint offs = 0 + (y + 0) * srcRowStride; 1323 for (x = 0; x < width; x += 8) { 1324 const GLubyte *lines[4]; 1325 lines[0] = &data[offs]; 1326 lines[1] = lines[0] + srcRowStride; 1327 lines[2] = lines[1] + srcRowStride; 1328 lines[3] = lines[2] + srcRowStride; 1329 offs += 8 * comps; 1330 fxt1_quantize(encoded, lines, comps); 1331 /* 128 bits per 8x4 block */ 1332 encoded += 4; 1333 } 1334 encoded += destRowStride; 1335 } 1336 1337 cleanUp: 1338 free(newSource); 1339 } 1340 1341 1342 /***************************************************************************\ 1343 * FXT1 decoder 1344 * 1345 * The decoder is based on GL_3DFX_texture_compression_FXT1 1346 * specification and serves as a concept for the encoder. 1347 \***************************************************************************/ 1348 1349 1350 /* lookup table for scaling 5 bit colors up to 8 bits */ 1351 static const GLubyte _rgb_scale_5[] = { 1352 0, 8, 16, 25, 33, 41, 49, 58, 1353 66, 74, 82, 90, 99, 107, 115, 123, 1354 132, 140, 148, 156, 165, 173, 181, 189, 1355 197, 206, 214, 222, 230, 239, 247, 255 1356 }; 1357 1358 /* lookup table for scaling 6 bit colors up to 8 bits */ 1359 static const GLubyte _rgb_scale_6[] = { 1360 0, 4, 8, 12, 16, 20, 24, 28, 1361 32, 36, 40, 45, 49, 53, 57, 61, 1362 65, 69, 73, 77, 81, 85, 89, 93, 1363 97, 101, 105, 109, 113, 117, 121, 125, 1364 130, 134, 138, 142, 146, 150, 154, 158, 1365 162, 166, 170, 174, 178, 182, 186, 190, 1366 194, 198, 202, 206, 210, 215, 219, 223, 1367 227, 231, 235, 239, 243, 247, 251, 255 1368 }; 1369 1370 1371 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31)) 1372 #define UP5(c) _rgb_scale_5[(c) & 31] 1373 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)] 1374 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n) 1375 1376 1377 static void 1378 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba) 1379 { 1380 const GLuint *cc; 1381 1382 t *= 3; 1383 cc = (const GLuint *)(code + t / 8); 1384 t = (cc[0] >> (t & 7)) & 7; 1385 1386 if (t == 7) { 1387 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0; 1388 } else { 1389 GLubyte r, g, b; 1390 cc = (const GLuint *)(code + 12); 1391 if (t == 0) { 1392 b = UP5(CC_SEL(cc, 0)); 1393 g = UP5(CC_SEL(cc, 5)); 1394 r = UP5(CC_SEL(cc, 10)); 1395 } else if (t == 6) { 1396 b = UP5(CC_SEL(cc, 15)); 1397 g = UP5(CC_SEL(cc, 20)); 1398 r = UP5(CC_SEL(cc, 25)); 1399 } else { 1400 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15))); 1401 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20))); 1402 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25))); 1403 } 1404 rgba[RCOMP] = r; 1405 rgba[GCOMP] = g; 1406 rgba[BCOMP] = b; 1407 rgba[ACOMP] = 255; 1408 } 1409 } 1410 1411 1412 static void 1413 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba) 1414 { 1415 const GLuint *cc; 1416 GLuint kk; 1417 1418 cc = (const GLuint *)code; 1419 if (t & 16) { 1420 cc++; 1421 t &= 15; 1422 } 1423 t = (cc[0] >> (t * 2)) & 3; 1424 1425 t *= 15; 1426 cc = (const GLuint *)(code + 8 + t / 8); 1427 kk = cc[0] >> (t & 7); 1428 rgba[BCOMP] = UP5(kk); 1429 rgba[GCOMP] = UP5(kk >> 5); 1430 rgba[RCOMP] = UP5(kk >> 10); 1431 rgba[ACOMP] = 255; 1432 } 1433 1434 1435 static void 1436 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba) 1437 { 1438 const GLuint *cc; 1439 GLuint col[2][3]; 1440 GLint glsb, selb; 1441 1442 cc = (const GLuint *)code; 1443 if (t & 16) { 1444 t &= 15; 1445 t = (cc[1] >> (t * 2)) & 3; 1446 /* col 2 */ 1447 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6; 1448 col[0][GCOMP] = CC_SEL(cc, 99); 1449 col[0][RCOMP] = CC_SEL(cc, 104); 1450 /* col 3 */ 1451 col[1][BCOMP] = CC_SEL(cc, 109); 1452 col[1][GCOMP] = CC_SEL(cc, 114); 1453 col[1][RCOMP] = CC_SEL(cc, 119); 1454 glsb = CC_SEL(cc, 126); 1455 selb = CC_SEL(cc, 33); 1456 } else { 1457 t = (cc[0] >> (t * 2)) & 3; 1458 /* col 0 */ 1459 col[0][BCOMP] = CC_SEL(cc, 64); 1460 col[0][GCOMP] = CC_SEL(cc, 69); 1461 col[0][RCOMP] = CC_SEL(cc, 74); 1462 /* col 1 */ 1463 col[1][BCOMP] = CC_SEL(cc, 79); 1464 col[1][GCOMP] = CC_SEL(cc, 84); 1465 col[1][RCOMP] = CC_SEL(cc, 89); 1466 glsb = CC_SEL(cc, 125); 1467 selb = CC_SEL(cc, 1); 1468 } 1469 1470 if (CC_SEL(cc, 124) & 1) { 1471 /* alpha[0] == 1 */ 1472 1473 if (t == 3) { 1474 /* zero */ 1475 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0; 1476 } else { 1477 GLubyte r, g, b; 1478 if (t == 0) { 1479 b = UP5(col[0][BCOMP]); 1480 g = UP5(col[0][GCOMP]); 1481 r = UP5(col[0][RCOMP]); 1482 } else if (t == 2) { 1483 b = UP5(col[1][BCOMP]); 1484 g = UP6(col[1][GCOMP], glsb); 1485 r = UP5(col[1][RCOMP]); 1486 } else { 1487 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2; 1488 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2; 1489 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2; 1490 } 1491 rgba[RCOMP] = r; 1492 rgba[GCOMP] = g; 1493 rgba[BCOMP] = b; 1494 rgba[ACOMP] = 255; 1495 } 1496 } else { 1497 /* alpha[0] == 0 */ 1498 GLubyte r, g, b; 1499 if (t == 0) { 1500 b = UP5(col[0][BCOMP]); 1501 g = UP6(col[0][GCOMP], glsb ^ selb); 1502 r = UP5(col[0][RCOMP]); 1503 } else if (t == 3) { 1504 b = UP5(col[1][BCOMP]); 1505 g = UP6(col[1][GCOMP], glsb); 1506 r = UP5(col[1][RCOMP]); 1507 } else { 1508 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP])); 1509 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb), 1510 UP6(col[1][GCOMP], glsb)); 1511 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP])); 1512 } 1513 rgba[RCOMP] = r; 1514 rgba[GCOMP] = g; 1515 rgba[BCOMP] = b; 1516 rgba[ACOMP] = 255; 1517 } 1518 } 1519 1520 1521 static void 1522 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba) 1523 { 1524 const GLuint *cc; 1525 GLubyte r, g, b, a; 1526 1527 cc = (const GLuint *)code; 1528 if (CC_SEL(cc, 124) & 1) { 1529 /* lerp == 1 */ 1530 GLuint col0[4]; 1531 1532 if (t & 16) { 1533 t &= 15; 1534 t = (cc[1] >> (t * 2)) & 3; 1535 /* col 2 */ 1536 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6; 1537 col0[GCOMP] = CC_SEL(cc, 99); 1538 col0[RCOMP] = CC_SEL(cc, 104); 1539 col0[ACOMP] = CC_SEL(cc, 119); 1540 } else { 1541 t = (cc[0] >> (t * 2)) & 3; 1542 /* col 0 */ 1543 col0[BCOMP] = CC_SEL(cc, 64); 1544 col0[GCOMP] = CC_SEL(cc, 69); 1545 col0[RCOMP] = CC_SEL(cc, 74); 1546 col0[ACOMP] = CC_SEL(cc, 109); 1547 } 1548 1549 if (t == 0) { 1550 b = UP5(col0[BCOMP]); 1551 g = UP5(col0[GCOMP]); 1552 r = UP5(col0[RCOMP]); 1553 a = UP5(col0[ACOMP]); 1554 } else if (t == 3) { 1555 b = UP5(CC_SEL(cc, 79)); 1556 g = UP5(CC_SEL(cc, 84)); 1557 r = UP5(CC_SEL(cc, 89)); 1558 a = UP5(CC_SEL(cc, 114)); 1559 } else { 1560 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79))); 1561 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84))); 1562 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89))); 1563 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114))); 1564 } 1565 } else { 1566 /* lerp == 0 */ 1567 1568 if (t & 16) { 1569 cc++; 1570 t &= 15; 1571 } 1572 t = (cc[0] >> (t * 2)) & 3; 1573 1574 if (t == 3) { 1575 /* zero */ 1576 r = g = b = a = 0; 1577 } else { 1578 GLuint kk; 1579 cc = (const GLuint *)code; 1580 a = UP5(cc[3] >> (t * 5 + 13)); 1581 t *= 15; 1582 cc = (const GLuint *)(code + 8 + t / 8); 1583 kk = cc[0] >> (t & 7); 1584 b = UP5(kk); 1585 g = UP5(kk >> 5); 1586 r = UP5(kk >> 10); 1587 } 1588 } 1589 rgba[RCOMP] = r; 1590 rgba[GCOMP] = g; 1591 rgba[BCOMP] = b; 1592 rgba[ACOMP] = a; 1593 } 1594 1595 1596 static void 1597 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */ 1598 GLint i, GLint j, GLubyte *rgba) 1599 { 1600 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = { 1601 fxt1_decode_1HI, /* cc-high = "00?" */ 1602 fxt1_decode_1HI, /* cc-high = "00?" */ 1603 fxt1_decode_1CHROMA, /* cc-chroma = "010" */ 1604 fxt1_decode_1ALPHA, /* alpha = "011" */ 1605 fxt1_decode_1MIXED, /* mixed = "1??" */ 1606 fxt1_decode_1MIXED, /* mixed = "1??" */ 1607 fxt1_decode_1MIXED, /* mixed = "1??" */ 1608 fxt1_decode_1MIXED /* mixed = "1??" */ 1609 }; 1610 1611 const GLubyte *code = (const GLubyte *)texture + 1612 ((j / 4) * (stride / 8) + (i / 8)) * 16; 1613 GLint mode = CC_SEL(code, 125); 1614 GLint t = i & 7; 1615 1616 if (t & 4) { 1617 t += 12; 1618 } 1619 t += (j & 3) * 4; 1620 1621 decode_1[mode](code, t, rgba); 1622 } 1623 1624 1625 1626 1627 static void 1628 fetch_rgb_fxt1(const GLubyte *map, 1629 GLint rowStride, GLint i, GLint j, GLfloat *texel) 1630 { 1631 GLubyte rgba[4]; 1632 fxt1_decode_1(map, rowStride, i, j, rgba); 1633 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]); 1634 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]); 1635 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]); 1636 texel[ACOMP] = 1.0F; 1637 } 1638 1639 1640 static void 1641 fetch_rgba_fxt1(const GLubyte *map, 1642 GLint rowStride, GLint i, GLint j, GLfloat *texel) 1643 { 1644 GLubyte rgba[4]; 1645 fxt1_decode_1(map, rowStride, i, j, rgba); 1646 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]); 1647 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]); 1648 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]); 1649 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]); 1650 } 1651 1652 1653 compressed_fetch_func 1654 _mesa_get_fxt_fetch_func(mesa_format format) 1655 { 1656 switch (format) { 1657 case MESA_FORMAT_RGB_FXT1: 1658 return fetch_rgb_fxt1; 1659 case MESA_FORMAT_RGBA_FXT1: 1660 return fetch_rgba_fxt1; 1661 default: 1662 return NULL; 1663 } 1664 } 1665