1 /* libs/opengles/dxt.cpp 2 ** 3 ** Copyright 2007, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #define TIMING 0 19 20 #if TIMING 21 #include <sys/time.h> // for optimization timing 22 #include <stdio.h> 23 #include <stdlib.h> 24 #endif 25 26 #include <GLES/gl.h> 27 #include <utils/Endian.h> 28 29 #include "context.h" 30 31 #define TIMING 0 32 33 namespace android { 34 35 static uint8_t avg23tab[64*64]; 36 static volatile int tables_initialized = 0; 37 38 // Definitions below are equivalent to these over the valid range of arguments 39 // #define div5(x) ((x)/5) 40 // #define div7(x) ((x)/7) 41 42 // Use fixed-point to divide by 5 and 7 43 // 3277 = 2^14/5 + 1 44 // 2341 = 2^14/7 + 1 45 #define div5(x) (((x)*3277) >> 14) 46 #define div7(x) (((x)*2341) >> 14) 47 48 // Table with entry [a << 6 | b] = (2*a + b)/3 for 0 <= a,b < 64 49 #define avg23(x0,x1) avg23tab[((x0) << 6) | (x1)] 50 51 // Extract 5/6/5 RGB 52 #define red(x) (((x) >> 11) & 0x1f) 53 #define green(x) (((x) >> 5) & 0x3f) 54 #define blue(x) ( (x) & 0x1f) 55 56 /* 57 * Convert 5/6/5 RGB (as 3 ints) to 8/8/8 58 * 59 * Operation count: 8 <<, 0 &, 5 | 60 */ 61 inline static int rgb565SepTo888(int r, int g, int b) 62 63 { 64 return ((((r << 3) | (r >> 2)) << 16) | 65 (((g << 2) | (g >> 4)) << 8) | 66 ((b << 3) | (b >> 2))); 67 } 68 69 /* 70 * Convert 5/6/5 RGB (as a single 16-bit word) to 8/8/8 71 * 72 * r4r3r2r1 r0g5g4g3 g2g1g0b4 b3b2b1b0 rgb 73 * r4r3r2 r1r0g5g4 g3g2g1g0 b4b3b2b1 b0 0 0 0 rgb << 3 74 * r4r3r2r1 r0r4r3r2 g5g4g3g2 g1g0g5g4 b4b3b2b1 b0b4b3b2 desired result 75 * 76 * Construct the 24-bit RGB word as: 77 * 78 * r4r3r2r1 r0------ -------- -------- -------- -------- (rgb << 8) & 0xf80000 79 * r4r3r2 -------- -------- -------- -------- (rgb << 3) & 0x070000 80 * g5g4g3g2 g1g0---- -------- -------- (rgb << 5) & 0x00fc00 81 * g5g4 -------- -------- (rgb >> 1) & 0x000300 82 * b4b3b2b1 b0------ (rgb << 3) & 0x0000f8 83 * b4b3b2 (rgb >> 2) & 0x000007 84 * 85 * Operation count: 5 <<, 6 &, 5 | (n.b. rgb >> 3 is used twice) 86 */ 87 inline static int rgb565To888(int rgb) 88 89 { 90 int rgb3 = rgb >> 3; 91 return (((rgb << 8) & 0xf80000) | 92 ( rgb3 & 0x070000) | 93 ((rgb << 5) & 0x00fc00) | 94 ((rgb >> 1) & 0x000300) | 95 ( rgb3 & 0x0000f8) | 96 ((rgb >> 2) & 0x000007)); 97 } 98 99 #if __BYTE_ORDER == __BIG_ENDIAN 100 static uint32_t swap(uint32_t x) { 101 int b0 = (x >> 24) & 0xff; 102 int b1 = (x >> 16) & 0xff; 103 int b2 = (x >> 8) & 0xff; 104 int b3 = (x ) & 0xff; 105 106 return (uint32_t)((b3 << 24) | (b2 << 16) | (b1 << 8) | b0); 107 } 108 #endif 109 110 static void 111 init_tables() 112 { 113 if (tables_initialized) { 114 return; 115 } 116 117 for (int i = 0; i < 64; i++) { 118 for (int j = 0; j < 64; j++) { 119 int avg = (2*i + j)/3; 120 avg23tab[(i << 6) | j] = avg; 121 } 122 } 123 124 asm volatile ("" : : : "memory"); 125 tables_initialized = 1; 126 } 127 128 /* 129 * Utility to scan a DXT1 compressed texture to determine whether it 130 * contains a transparent pixel (color0 < color1, code == 3). This 131 * may be useful if the application lacks information as to whether 132 * the true format is GL_COMPRESSED_RGB_S3TC_DXT1_EXT or 133 * GL_COMPRESSED_RGBA_S3TC_DXT1_EXT. 134 */ 135 bool 136 DXT1HasAlpha(const GLvoid *data, int width, int height) { 137 #if TIMING 138 struct timeval start_t, end_t; 139 struct timezone tz; 140 141 gettimeofday(&start_t, &tz); 142 #endif 143 144 bool hasAlpha = false; 145 146 int xblocks = (width + 3)/4; 147 int yblocks = (height + 3)/4; 148 int numblocks = xblocks*yblocks; 149 150 uint32_t const *d32 = (uint32_t *)data; 151 for (int b = 0; b < numblocks; b++) { 152 uint32_t colors = *d32++; 153 154 #if __BYTE_ORDER == __BIG_ENDIAN 155 colors = swap(colors); 156 #endif 157 158 uint16_t color0 = colors & 0xffff; 159 uint16_t color1 = colors >> 16; 160 161 if (color0 < color1) { 162 // There's no need to endian-swap within 'bits' 163 // since we don't care which pixel is the transparent one 164 uint32_t bits = *d32++; 165 166 // Detect if any (odd, even) pair of bits are '11' 167 // bits: b31 b30 b29 ... b3 b2 b1 b0 168 // bits >> 1: b31 b31 b30 ... b4 b3 b2 b1 169 // &: b31 (b31 & b30) (b29 & b28) ... (b2 & b1) (b1 & b0) 170 // & 0x55..: 0 (b31 & b30) 0 ... 0 (b1 & b0) 171 if (((bits & (bits >> 1)) & 0x55555555) != 0) { 172 hasAlpha = true; 173 goto done; 174 } 175 } else { 176 // Skip 4 bytes 177 ++d32; 178 } 179 } 180 181 done: 182 #if TIMING 183 gettimeofday(&end_t, &tz); 184 long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 + 185 (end_t.tv_usec - start_t.tv_usec); 186 187 printf("Scanned w=%d h=%d in %ld usec\n", width, height, usec); 188 #endif 189 190 return hasAlpha; 191 } 192 193 static void 194 decodeDXT1(const GLvoid *data, int width, int height, 195 void *surface, int stride, 196 bool hasAlpha) 197 198 { 199 init_tables(); 200 201 uint32_t const *d32 = (uint32_t *)data; 202 203 // Color table for the current block 204 uint16_t c[4]; 205 c[0] = c[1] = c[2] = c[3] = 0; 206 207 // Specified colors from the previous block 208 uint16_t prev_color0 = 0x0000; 209 uint16_t prev_color1 = 0x0000; 210 211 uint16_t* rowPtr = (uint16_t*)surface; 212 for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) { 213 uint16_t *blockPtr = rowPtr; 214 for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) { 215 uint32_t colors = *d32++; 216 uint32_t bits = *d32++; 217 218 #if __BYTE_ORDER == __BIG_ENDIAN 219 colors = swap(colors); 220 bits = swap(bits); 221 #endif 222 223 // Raw colors 224 uint16_t color0 = colors & 0xffff; 225 uint16_t color1 = colors >> 16; 226 227 // If the new block has the same base colors as the 228 // previous one, we don't need to recompute the color 229 // table c[] 230 if (color0 != prev_color0 || color1 != prev_color1) { 231 // Store raw colors for comparison with next block 232 prev_color0 = color0; 233 prev_color1 = color1; 234 235 int r0 = red(color0); 236 int g0 = green(color0); 237 int b0 = blue(color0); 238 239 int r1 = red(color1); 240 int g1 = green(color1); 241 int b1 = blue(color1); 242 243 if (hasAlpha) { 244 c[0] = (r0 << 11) | ((g0 >> 1) << 6) | (b0 << 1) | 0x1; 245 c[1] = (r1 << 11) | ((g1 >> 1) << 6) | (b1 << 1) | 0x1; 246 } else { 247 c[0] = color0; 248 c[1] = color1; 249 } 250 251 int r2, g2, b2, r3, g3, b3, a3; 252 253 int bbits = bits >> 1; 254 bool has2 = ((bbits & ~bits) & 0x55555555) != 0; 255 bool has3 = ((bbits & bits) & 0x55555555) != 0; 256 257 if (has2 || has3) { 258 if (color0 > color1) { 259 r2 = avg23(r0, r1); 260 g2 = avg23(g0, g1); 261 b2 = avg23(b0, b1); 262 263 r3 = avg23(r1, r0); 264 g3 = avg23(g1, g0); 265 b3 = avg23(b1, b0); 266 a3 = 1; 267 } else { 268 r2 = (r0 + r1) >> 1; 269 g2 = (g0 + g1) >> 1; 270 b2 = (b0 + b1) >> 1; 271 272 r3 = g3 = b3 = a3 = 0; 273 } 274 if (hasAlpha) { 275 c[2] = (r2 << 11) | ((g2 >> 1) << 6) | 276 (b2 << 1) | 0x1; 277 c[3] = (r3 << 11) | ((g3 >> 1) << 6) | 278 (b3 << 1) | a3; 279 } else { 280 c[2] = (r2 << 11) | (g2 << 5) | b2; 281 c[3] = (r3 << 11) | (g3 << 5) | b3; 282 } 283 } 284 } 285 286 uint16_t* blockRowPtr = blockPtr; 287 for (int y = 0; y < 4; y++, blockRowPtr += stride) { 288 // Don't process rows past the botom 289 if (base_y + y >= height) { 290 break; 291 } 292 293 int w = min(width - base_x, 4); 294 for (int x = 0; x < w; x++) { 295 int code = bits & 0x3; 296 bits >>= 2; 297 298 blockRowPtr[x] = c[code]; 299 } 300 } 301 } 302 } 303 } 304 305 // Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE 306 static void 307 decodeDXT3(const GLvoid *data, int width, int height, 308 void *surface, int stride) 309 310 { 311 init_tables(); 312 313 uint32_t const *d32 = (uint32_t *)data; 314 315 // Specified colors from the previous block 316 uint16_t prev_color0 = 0x0000; 317 uint16_t prev_color1 = 0x0000; 318 319 // Color table for the current block 320 uint32_t c[4]; 321 c[0] = c[1] = c[2] = c[3] = 0; 322 323 uint32_t* rowPtr = (uint32_t*)surface; 324 for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) { 325 uint32_t *blockPtr = rowPtr; 326 for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) { 327 328 #if __BYTE_ORDER == __BIG_ENDIAN 329 uint32_t alphahi = *d32++; 330 uint32_t alphalo = *d32++; 331 alphahi = swap(alphahi); 332 alphalo = swap(alphalo); 333 #else 334 uint32_t alphalo = *d32++; 335 uint32_t alphahi = *d32++; 336 #endif 337 338 uint32_t colors = *d32++; 339 uint32_t bits = *d32++; 340 341 #if __BYTE_ORDER == __BIG_ENDIAN 342 colors = swap(colors); 343 bits = swap(bits); 344 #endif 345 346 uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo; 347 348 // Raw colors 349 uint16_t color0 = colors & 0xffff; 350 uint16_t color1 = colors >> 16; 351 352 // If the new block has the same base colors as the 353 // previous one, we don't need to recompute the color 354 // table c[] 355 if (color0 != prev_color0 || color1 != prev_color1) { 356 // Store raw colors for comparison with next block 357 prev_color0 = color0; 358 prev_color1 = color1; 359 360 int bbits = bits >> 1; 361 bool has2 = ((bbits & ~bits) & 0x55555555) != 0; 362 bool has3 = ((bbits & bits) & 0x55555555) != 0; 363 364 if (has2 || has3) { 365 int r0 = red(color0); 366 int g0 = green(color0); 367 int b0 = blue(color0); 368 369 int r1 = red(color1); 370 int g1 = green(color1); 371 int b1 = blue(color1); 372 373 int r2 = avg23(r0, r1); 374 int g2 = avg23(g0, g1); 375 int b2 = avg23(b0, b1); 376 377 int r3 = avg23(r1, r0); 378 int g3 = avg23(g1, g0); 379 int b3 = avg23(b1, b0); 380 381 c[0] = rgb565SepTo888(r0, g0, b0); 382 c[1] = rgb565SepTo888(r1, g1, b1); 383 c[2] = rgb565SepTo888(r2, g2, b2); 384 c[3] = rgb565SepTo888(r3, g3, b3); 385 } else { 386 // Convert to 8 bits 387 c[0] = rgb565To888(color0); 388 c[1] = rgb565To888(color1); 389 } 390 } 391 392 uint32_t* blockRowPtr = blockPtr; 393 for (int y = 0; y < 4; y++, blockRowPtr += stride) { 394 // Don't process rows past the botom 395 if (base_y + y >= height) { 396 break; 397 } 398 399 int w = min(width - base_x, 4); 400 for (int x = 0; x < w; x++) { 401 int a = alpha & 0xf; 402 alpha >>= 4; 403 404 int code = bits & 0x3; 405 bits >>= 2; 406 407 blockRowPtr[x] = c[code] | (a << 28) | (a << 24); 408 } 409 } 410 } 411 } 412 } 413 414 // Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE 415 static void 416 decodeDXT5(const GLvoid *data, int width, int height, 417 void *surface, int stride) 418 419 { 420 init_tables(); 421 422 uint32_t const *d32 = (uint32_t *)data; 423 424 // Specified alphas from the previous block 425 uint8_t prev_alpha0 = 0x00; 426 uint8_t prev_alpha1 = 0x00; 427 428 // Specified colors from the previous block 429 uint16_t prev_color0 = 0x0000; 430 uint16_t prev_color1 = 0x0000; 431 432 // Alpha table for the current block 433 uint8_t a[8]; 434 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = 0; 435 436 // Color table for the current block 437 uint32_t c[4]; 438 c[0] = c[1] = c[2] = c[3] = 0; 439 440 int good_a5 = 0; 441 int bad_a5 = 0; 442 int good_a6 = 0; 443 int bad_a6 = 0; 444 int good_a7 = 0; 445 int bad_a7 = 0; 446 447 uint32_t* rowPtr = (uint32_t*)surface; 448 for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) { 449 uint32_t *blockPtr = rowPtr; 450 for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) { 451 452 #if __BYTE_ORDER == __BIG_ENDIAN 453 uint32_t alphahi = *d32++; 454 uint32_t alphalo = *d32++; 455 alphahi = swap(alphahi); 456 alphalo = swap(alphalo); 457 #else 458 uint32_t alphalo = *d32++; 459 uint32_t alphahi = *d32++; 460 #endif 461 462 uint32_t colors = *d32++; 463 uint32_t bits = *d32++; 464 465 #if __BYTE_ORDER == __BIG_ENDIANx 466 colors = swap(colors); 467 bits = swap(bits); 468 #endif 469 470 uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo; 471 uint64_t alpha0 = alpha & 0xff; 472 alpha >>= 8; 473 uint64_t alpha1 = alpha & 0xff; 474 alpha >>= 8; 475 476 if (alpha0 != prev_alpha0 || alpha1 != prev_alpha1) { 477 prev_alpha0 = alpha0; 478 prev_alpha1 = alpha1; 479 480 a[0] = alpha0; 481 a[1] = alpha1; 482 int a01 = alpha0 + alpha1 - 1; 483 if (alpha0 > alpha1) { 484 a[2] = div7(6*alpha0 + alpha1); 485 a[4] = div7(4*alpha0 + 3*alpha1); 486 a[6] = div7(2*alpha0 + 5*alpha1); 487 488 // Use symmetry to derive half of the values 489 // A few values will be off by 1 (~.5%) 490 // Alternate which values are computed directly 491 // and which are derived to try to reduce bias 492 a[3] = a01 - a[6]; 493 a[5] = a01 - a[4]; 494 a[7] = a01 - a[2]; 495 } else { 496 a[2] = div5(4*alpha0 + alpha1); 497 a[4] = div5(2*alpha0 + 3*alpha1); 498 a[3] = a01 - a[4]; 499 a[5] = a01 - a[2]; 500 a[6] = 0x00; 501 a[7] = 0xff; 502 } 503 } 504 505 // Raw colors 506 uint16_t color0 = colors & 0xffff; 507 uint16_t color1 = colors >> 16; 508 509 // If the new block has the same base colors as the 510 // previous one, we don't need to recompute the color 511 // table c[] 512 if (color0 != prev_color0 || color1 != prev_color1) { 513 // Store raw colors for comparison with next block 514 prev_color0 = color0; 515 prev_color1 = color1; 516 517 int bbits = bits >> 1; 518 bool has2 = ((bbits & ~bits) & 0x55555555) != 0; 519 bool has3 = ((bbits & bits) & 0x55555555) != 0; 520 521 if (has2 || has3) { 522 int r0 = red(color0); 523 int g0 = green(color0); 524 int b0 = blue(color0); 525 526 int r1 = red(color1); 527 int g1 = green(color1); 528 int b1 = blue(color1); 529 530 int r2 = avg23(r0, r1); 531 int g2 = avg23(g0, g1); 532 int b2 = avg23(b0, b1); 533 534 int r3 = avg23(r1, r0); 535 int g3 = avg23(g1, g0); 536 int b3 = avg23(b1, b0); 537 538 c[0] = rgb565SepTo888(r0, g0, b0); 539 c[1] = rgb565SepTo888(r1, g1, b1); 540 c[2] = rgb565SepTo888(r2, g2, b2); 541 c[3] = rgb565SepTo888(r3, g3, b3); 542 } else { 543 // Convert to 8 bits 544 c[0] = rgb565To888(color0); 545 c[1] = rgb565To888(color1); 546 } 547 } 548 549 uint32_t* blockRowPtr = blockPtr; 550 for (int y = 0; y < 4; y++, blockRowPtr += stride) { 551 // Don't process rows past the botom 552 if (base_y + y >= height) { 553 break; 554 } 555 556 int w = min(width - base_x, 4); 557 for (int x = 0; x < w; x++) { 558 int acode = alpha & 0x7; 559 alpha >>= 3; 560 561 int code = bits & 0x3; 562 bits >>= 2; 563 564 blockRowPtr[x] = c[code] | (a[acode] << 24); 565 } 566 } 567 } 568 } 569 } 570 571 /* 572 * Decode a DXT-compressed texture into memory. DXT textures consist of 573 * a series of 4x4 pixel blocks in left-to-right, top-down order. 574 * The number of blocks is given by ceil(width/4)*ceil(height/4). 575 * 576 * 'data' points to the texture data. 'width' and 'height' indicate the 577 * dimensions of the texture. We assume width and height are >= 0 but 578 * do not require them to be powers of 2 or divisible by any factor. 579 * 580 * The output is written to 'surface' with each scanline separated by 581 * 'stride' 2- or 4-byte words. 582 * 583 * 'format' indicates the type of compression and must be one of the following: 584 * 585 * GL_COMPRESSED_RGB_S3TC_DXT1_EXT: 586 * The output is written as 5/6/5 opaque RGB (16 bit words). 587 * 8 bytes are read from 'data' for each block. 588 * 589 * GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 590 * The output is written as 5/5/5/1 RGBA (16 bit words) 591 * 8 bytes are read from 'data' for each block. 592 * 593 * GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 594 * GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 595 * The output is written as 8/8/8/8 ARGB (32 bit words) 596 * 16 bytes are read from 'data' for each block. 597 */ 598 void 599 decodeDXT(const GLvoid *data, int width, int height, 600 void *surface, int stride, int format) 601 { 602 #if TIMING 603 struct timeval start_t, end_t; 604 struct timezone tz; 605 606 gettimeofday(&start_t, &tz); 607 #endif 608 609 switch (format) { 610 case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: 611 decodeDXT1(data, width, height, surface, stride, false); 612 break; 613 614 case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: 615 decodeDXT1(data, width, height, surface, stride, true); 616 break; 617 618 case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: 619 decodeDXT3(data, width, height, surface, stride); 620 break; 621 622 case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: 623 decodeDXT5(data, width, height, surface, stride); 624 break; 625 } 626 627 #if TIMING 628 gettimeofday(&end_t, &tz); 629 long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 + 630 (end_t.tv_usec - start_t.tv_usec); 631 632 printf("Loaded w=%d h=%d in %ld usec\n", width, height, usec); 633 #endif 634 } 635 636 } // namespace android 637