Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright 2014 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkTextureCompressor_LATC.h"
      9 #include "SkTextureCompressor_Blitter.h"
     10 #include "SkTextureCompressor_Utils.h"
     11 
     12 #include "SkBlitter.h"
     13 #include "SkEndian.h"
     14 
     15 // Compression options. In general, the slow version is much more accurate, but
     16 // much slower. The fast option is much faster, but much less accurate. YMMV.
     17 #define COMPRESS_LATC_SLOW 0
     18 #define COMPRESS_LATC_FAST 1
     19 
     20 ////////////////////////////////////////////////////////////////////////////////
     21 
     22 // Generates an LATC palette. LATC constructs
     23 // a palette of eight colors from LUM0 and LUM1 using the algorithm:
     24 //
     25 // LUM0,              if lum0 > lum1 and code(x,y) == 0
     26 // LUM1,              if lum0 > lum1 and code(x,y) == 1
     27 // (6*LUM0+  LUM1)/7, if lum0 > lum1 and code(x,y) == 2
     28 // (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3
     29 // (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4
     30 // (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5
     31 // (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6
     32 // (  LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
     33 //
     34 // LUM0,              if lum0 <= lum1 and code(x,y) == 0
     35 // LUM1,              if lum0 <= lum1 and code(x,y) == 1
     36 // (4*LUM0+  LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
     37 // (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
     38 // (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
     39 // (  LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
     40 // 0,                 if lum0 <= lum1 and code(x,y) == 6
     41 // 255,               if lum0 <= lum1 and code(x,y) == 7
     42 
     43 static const int kLATCPaletteSize = 8;
     44 static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
     45     palette[0] = lum0;
     46     palette[1] = lum1;
     47     if (lum0 > lum1) {
     48         for (int i = 1; i < 7; i++) {
     49             palette[i+1] = ((7-i)*lum0 + i*lum1) / 7;
     50         }
     51     } else {
     52         for (int i = 1; i < 5; i++) {
     53             palette[i+1] = ((5-i)*lum0 + i*lum1) / 5;
     54         }
     55         palette[6] = 0;
     56         palette[7] = 255;
     57     }
     58 }
     59 
     60 ////////////////////////////////////////////////////////////////////////////////
     61 
     62 #if COMPRESS_LATC_SLOW
     63 
     64 ////////////////////////////////////////////////////////////////////////////////
     65 //
     66 // Utility Functions
     67 //
     68 ////////////////////////////////////////////////////////////////////////////////
     69 
     70 // Absolute difference between two values. More correct than SkTAbs(a - b)
     71 // because it works on unsigned values.
     72 template <typename T> inline T abs_diff(const T &a, const T &b) {
     73     return (a > b) ? (a - b) : (b - a);
     74 }
     75 
     76 static bool is_extremal(uint8_t pixel) {
     77     return 0 == pixel || 255 == pixel;
     78 }
     79 
     80 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
     81 
     82 // This function is used by both R11 EAC and LATC to compress 4x4 blocks
     83 // of 8-bit alpha into 64-bit values that comprise the compressed data.
     84 // For both formats, we need to make sure that the dimensions of the
     85 // src pixels are divisible by 4, and copy 4x4 blocks one at a time
     86 // for compression.
     87 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
     88                                      int width, int height, size_t rowBytes,
     89                                      A84x4To64BitProc proc) {
     90     // Make sure that our data is well-formed enough to be considered for compression
     91     if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
     92         return false;
     93     }
     94 
     95     int blocksX = width >> 2;
     96     int blocksY = height >> 2;
     97 
     98     uint8_t block[16];
     99     uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
    100     for (int y = 0; y < blocksY; ++y) {
    101         for (int x = 0; x < blocksX; ++x) {
    102             // Load block
    103             for (int k = 0; k < 4; ++k) {
    104                 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
    105             }
    106 
    107             // Compress it
    108             *encPtr = proc(block);
    109             ++encPtr;
    110         }
    111         src += 4 * rowBytes;
    112     }
    113 
    114     return true;
    115 }
    116 
    117 ////////////////////////////////////////////////////////////////////////////////
    118 //
    119 // LATC compressor
    120 //
    121 ////////////////////////////////////////////////////////////////////////////////
    122 
    123 // LATC compressed texels down into square 4x4 blocks
    124 static const int kLATCBlockSize = 4;
    125 static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;
    126 
    127 // Compress a block by using the bounding box of the pixels. It is assumed that
    128 // there are no extremal pixels in this block otherwise we would have used
    129 // compressBlockBBIgnoreExtremal.
    130 static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
    131     uint8_t minVal = 255;
    132     uint8_t maxVal = 0;
    133     for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
    134         minVal = SkTMin(pixels[i], minVal);
    135         maxVal = SkTMax(pixels[i], maxVal);
    136     }
    137 
    138     SkASSERT(!is_extremal(minVal));
    139     SkASSERT(!is_extremal(maxVal));
    140 
    141     uint8_t palette[kLATCPaletteSize];
    142     generate_latc_palette(palette, maxVal, minVal);
    143 
    144     uint64_t indices = 0;
    145     for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
    146 
    147         // Find the best palette index
    148         uint8_t bestError = abs_diff(pixels[i], palette[0]);
    149         uint8_t idx = 0;
    150         for (int j = 1; j < kLATCPaletteSize; ++j) {
    151             uint8_t error = abs_diff(pixels[i], palette[j]);
    152             if (error < bestError) {
    153                 bestError = error;
    154                 idx = j;
    155             }
    156         }
    157 
    158         indices <<= 3;
    159         indices |= idx;
    160     }
    161 
    162     return
    163         SkEndian_SwapLE64(
    164             static_cast<uint64_t>(maxVal) |
    165             (static_cast<uint64_t>(minVal) << 8) |
    166             (indices << 16));
    167 }
    168 
    169 // Compress a block by using the bounding box of the pixels without taking into
    170 // account the extremal values. The generated palette will contain extremal values
    171 // and fewer points along the line segment to interpolate.
    172 static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
    173     uint8_t minVal = 255;
    174     uint8_t maxVal = 0;
    175     for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
    176         if (is_extremal(pixels[i])) {
    177             continue;
    178         }
    179 
    180         minVal = SkTMin(pixels[i], minVal);
    181         maxVal = SkTMax(pixels[i], maxVal);
    182     }
    183 
    184     SkASSERT(!is_extremal(minVal));
    185     SkASSERT(!is_extremal(maxVal));
    186 
    187     uint8_t palette[kLATCPaletteSize];
    188     generate_latc_palette(palette, minVal, maxVal);
    189 
    190     uint64_t indices = 0;
    191     for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
    192 
    193         // Find the best palette index
    194         uint8_t idx = 0;
    195         if (is_extremal(pixels[i])) {
    196             if (0xFF == pixels[i]) {
    197                 idx = 7;
    198             } else if (0 == pixels[i]) {
    199                 idx = 6;
    200             } else {
    201                 SkFAIL("Pixel is extremal but not really?!");
    202             }
    203         } else {
    204             uint8_t bestError = abs_diff(pixels[i], palette[0]);
    205             for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
    206                 uint8_t error = abs_diff(pixels[i], palette[j]);
    207                 if (error < bestError) {
    208                     bestError = error;
    209                     idx = j;
    210                 }
    211             }
    212         }
    213 
    214         indices <<= 3;
    215         indices |= idx;
    216     }
    217 
    218     return
    219         SkEndian_SwapLE64(
    220             static_cast<uint64_t>(minVal) |
    221             (static_cast<uint64_t>(maxVal) << 8) |
    222             (indices << 16));
    223 }
    224 
    225 
    226 // Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from two
    227 // values LUM0 and LUM1, and an index into the generated palette. Details of how
    228 // the palette is generated can be found in the comments of generatePalette above.
    229 //
    230 // We choose which palette type to use based on whether or not 'pixels' contains
    231 // any extremal values (0 or 255). If there are extremal values, then we use the
    232 // palette that has the extremal values built in. Otherwise, we use the full bounding
    233 // box.
    234 
    235 static uint64_t compress_latc_block(const uint8_t pixels[]) {
    236     // Collect unique pixels
    237     int nUniquePixels = 0;
    238     uint8_t uniquePixels[kLATCPixelsPerBlock];
    239     for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
    240         bool foundPixel = false;
    241         for (int j = 0; j < nUniquePixels; ++j) {
    242             foundPixel = foundPixel || uniquePixels[j] == pixels[i];
    243         }
    244 
    245         if (!foundPixel) {
    246             uniquePixels[nUniquePixels] = pixels[i];
    247             ++nUniquePixels;
    248         }
    249     }
    250 
    251     // If there's only one unique pixel, then our compression is easy.
    252     if (1 == nUniquePixels) {
    253         return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8));
    254 
    255     // Similarly, if there are only two unique pixels, then our compression is
    256     // easy again: place the pixels in the block header, and assign the indices
    257     // with one or zero depending on which pixel they belong to.
    258     } else if (2 == nUniquePixels) {
    259         uint64_t outBlock = 0;
    260         for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
    261             int idx = 0;
    262             if (pixels[i] == uniquePixels[1]) {
    263                 idx = 1;
    264             }
    265 
    266             outBlock <<= 3;
    267             outBlock |= idx;
    268         }
    269         outBlock <<= 16;
    270         outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8));
    271         return SkEndian_SwapLE64(outBlock);
    272     }
    273 
    274     // Count non-maximal pixel values
    275     int nonExtremalPixels = 0;
    276     for (int i = 0; i < nUniquePixels; ++i) {
    277         if (!is_extremal(uniquePixels[i])) {
    278             ++nonExtremalPixels;
    279         }
    280     }
    281 
    282     // If all the pixels are nonmaximal then compute the palette using
    283     // the bounding box of all the pixels.
    284     if (nonExtremalPixels == nUniquePixels) {
    285         // This is really just for correctness, in all of my tests we
    286         // never take this step. We don't lose too much perf here because
    287         // most of the processing in this function is worth it for the
    288         // 1 == nUniquePixels optimization.
    289         return compress_latc_block_bb(pixels);
    290     } else {
    291         return compress_latc_block_bb_ignore_extremal(pixels);
    292     }
    293 }
    294 
    295 #endif  // COMPRESS_LATC_SLOW
    296 
    297 ////////////////////////////////////////////////////////////////////////////////
    298 
    299 #if COMPRESS_LATC_FAST
    300 
    301 // Take the top three bits of each index and pack them into the low 12
    302 // bits of the integer.
    303 static inline uint32_t pack_index(uint32_t x) {
    304     // Pack it in...
    305 #if defined (SK_CPU_BENDIAN)
    306     return
    307         (x >> 24) |
    308         ((x >> 13) & 0x38) |
    309         ((x >> 2) & 0x1C0) |
    310         ((x << 9) & 0xE00);
    311 #else
    312     return
    313         (x & 0x7) |
    314         ((x >> 5) & 0x38) |
    315         ((x >> 10) & 0x1C0) |
    316         ((x >> 15) & 0xE00);
    317 #endif
    318 }
    319 
    320 // Converts each 8-bit byte in the integer into an LATC index, and then packs
    321 // the indices into the low 12 bits of the integer.
    322 static inline uint32_t convert_index(uint32_t x) {
    323     // Since the palette is
    324     // 255, 0, 219, 182, 146, 109, 73, 36
    325     // we need to map the high three bits of each byte in the integer
    326     // from
    327     // 0 1 2 3 4 5 6 7
    328     // to
    329     // 1 7 6 5 4 3 2 0
    330     //
    331     // This first operation takes the mapping from
    332     // 0 1 2 3 4 5 6 7  -->  7 6 5 4 3 2 1 0
    333     x = 0x07070707 - SkTextureCompressor::ConvertToThreeBitIndex(x);
    334 
    335     // mask is 1 if index is non-zero
    336     const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101;
    337 
    338     // add mask:
    339     // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0
    340     x = (x + mask);
    341 
    342     // Handle overflow:
    343     // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0
    344     x |= (x >> 3) & 0x01010101;
    345 
    346     // Mask out high bits:
    347     // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0
    348     x &= 0x07070707;
    349 
    350     return pack_index(x);
    351 }
    352 
    353 typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, size_t rowBytes);
    354 template<PackIndicesProc packIndicesProc>
    355 static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, size_t rowBytes) {
    356     *(reinterpret_cast<uint64_t*>(*dstPtr)) =
    357         SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16));
    358     *dstPtr += 8;
    359 }
    360 
    361 inline uint64_t PackRowMajor(const uint8_t *indices, size_t rowBytes) {
    362     uint64_t result = 0;
    363     for (int i = 0; i < 4; ++i) {
    364         const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*rowBytes));
    365         result |= static_cast<uint64_t>(convert_index(idx)) << 12*i;
    366     }
    367     return result;
    368 }
    369 
    370 inline uint64_t PackColumnMajor(const uint8_t *indices, size_t rowBytes) {
    371     // !SPEED! Blarg, this is kind of annoying. SSE4 can make this
    372     // a LOT faster.
    373     uint8_t transposed[16];
    374     for (int i = 0; i < 4; ++i) {
    375         for (int j = 0; j < 4; ++j) {
    376             transposed[j*4+i] = indices[i*rowBytes + j];
    377         }
    378     }
    379 
    380     return PackRowMajor(transposed, 4);
    381 }
    382 
    383 static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src,
    384                                  int width, int height, size_t rowBytes) {
    385 
    386     if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) {
    387         return false;
    388     }
    389 
    390     uint8_t** dstPtr = &dst;
    391     for (int y = 0; y < height; y += 4) {
    392         for (int x = 0; x < width; x += 4) {
    393             compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, rowBytes);
    394         }
    395     }
    396 
    397     return true;
    398 }
    399 
    400 void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) {
    401     compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
    402 }
    403 
    404 #endif  // COMPRESS_LATC_FAST
    405 
    406 void decompress_latc_block(uint8_t* dst, int dstRowBytes, const uint8_t* src) {
    407     uint64_t block = SkEndian_SwapLE64(*(reinterpret_cast<const uint64_t *>(src)));
    408     uint8_t lum0 = block & 0xFF;
    409     uint8_t lum1 = (block >> 8) & 0xFF;
    410 
    411     uint8_t palette[kLATCPaletteSize];
    412     generate_latc_palette(palette, lum0, lum1);
    413 
    414     block >>= 16;
    415     for (int j = 0; j < 4; ++j) {
    416         for (int i = 0; i < 4; ++i) {
    417             dst[i] = palette[block & 0x7];
    418             block >>= 3;
    419         }
    420         dst += dstRowBytes;
    421     }
    422 }
    423 
    424 // This is the type passed as the CompressorType argument of the compressed
    425 // blitter for the LATC format. The static functions required to be in this
    426 // struct are documented in SkTextureCompressor_Blitter.h
    427 struct CompressorLATC {
    428     static inline void CompressA8Vertical(uint8_t* dst, const uint8_t block[]) {
    429         compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
    430     }
    431 
    432     static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src,
    433                                             int srcRowBytes) {
    434         compress_a8_latc_block<PackRowMajor>(&dst, src, srcRowBytes);
    435     }
    436 
    437 #if PEDANTIC_BLIT_RECT
    438     static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes,
    439                                    const uint8_t* mask) {
    440         // Pack the mask
    441         uint64_t cmpMask = 0;
    442         for (int i = 0; i < 4; ++i) {
    443             const uint32_t idx = *(reinterpret_cast<const uint32_t*>(src + i*srcRowBytes));
    444             cmpMask |= static_cast<uint64_t>(pack_index(idx)) << 12*i;
    445         }
    446         cmpMask = SkEndian_SwapLE64(cmpMask << 16); // avoid header
    447 
    448         uint64_t cmpSrc;
    449         uint8_t *cmpSrcPtr = reinterpret_cast<uint8_t*>(&cmpSrc);
    450         compress_a8_latc_block<PackRowMajor>(&cmpSrcPtr, src, srcRowBytes);
    451 
    452         // Mask out header
    453         cmpSrc = cmpSrc & cmpMask;
    454 
    455         // Read destination encoding
    456         uint64_t *cmpDst = reinterpret_cast<uint64_t*>(dst);
    457 
    458         // If the destination is the encoding for a blank block, then we need
    459         // to properly set the header
    460         if (0 == cmpDst) {
    461             *cmpDst = SkTEndian_SwapLE64(0x24924924924900FFULL);
    462         }
    463 
    464         // Set the new indices
    465         *cmpDst &= ~cmpMask;
    466         *cmpDst |= cmpSrc;
    467     }
    468 #endif  // PEDANTIC_BLIT_RECT
    469 };
    470 
    471 ////////////////////////////////////////////////////////////////////////////////
    472 
    473 namespace SkTextureCompressor {
    474 
    475 bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, size_t rowBytes) {
    476 #if COMPRESS_LATC_FAST
    477     return compress_4x4_a8_latc(dst, src, width, height, rowBytes);
    478 #elif COMPRESS_LATC_SLOW
    479     return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_latc_block);
    480 #else
    481 #error "Must choose either fast or slow LATC compression"
    482 #endif
    483 }
    484 
    485 SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer,
    486                              SkTBlitterAllocator* allocator) {
    487     if ((width % 4) != 0 || (height % 4) != 0) {
    488         return nullptr;
    489     }
    490 
    491 #if COMPRESS_LATC_FAST
    492     // Memset the output buffer to an encoding that decodes to zero. We must do this
    493     // in order to avoid having uninitialized values in the buffer if the blitter
    494     // decides not to write certain scanlines (and skip entire rows of blocks).
    495     // In the case of LATC, if everything is zero, then LUM0 and LUM1 are also zero,
    496     // and they will only be non-zero (0xFF) if the index is 7. So bzero will do just fine.
    497     // (8 bytes per block) * (w * h / 16 blocks) = w * h / 2
    498     sk_bzero(outputBuffer, width * height / 2);
    499 
    500     return allocator->createT<
    501         SkTCompressedAlphaBlitter<4, 8, CompressorLATC>, int, int, void* >
    502         (width, height, outputBuffer);
    503 #elif COMPRESS_LATC_SLOW
    504     // TODO (krajcevski)
    505     return nullptr;
    506 #endif
    507 }
    508 
    509 void DecompressLATC(uint8_t* dst, int dstRowBytes, const uint8_t* src, int width, int height) {
    510     for (int j = 0; j < height; j += 4) {
    511         for (int i = 0; i < width; i += 4) {
    512             decompress_latc_block(dst + i, dstRowBytes, src);
    513             src += 8;
    514         }
    515         dst += 4 * dstRowBytes;
    516     }
    517 }
    518 
    519 }  // SkTextureCompressor
    520