Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright 2014 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkTextureCompressor_LATC.h"
      9 #include "SkTextureCompressor_Blitter.h"
     10 
     11 #include "SkBlitter.h"
     12 #include "SkEndian.h"
     13 
     14 // Compression options. In general, the slow version is much more accurate, but
     15 // much slower. The fast option is much faster, but much less accurate. YMMV.
     16 #define COMPRESS_LATC_SLOW 0
     17 #define COMPRESS_LATC_FAST 1
     18 
     19 ////////////////////////////////////////////////////////////////////////////////
     20 
     21 // Generates an LATC palette. LATC constructs
     22 // a palette of eight colors from LUM0 and LUM1 using the algorithm:
     23 //
     24 // LUM0,              if lum0 > lum1 and code(x,y) == 0
     25 // LUM1,              if lum0 > lum1 and code(x,y) == 1
     26 // (6*LUM0+  LUM1)/7, if lum0 > lum1 and code(x,y) == 2
     27 // (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3
     28 // (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4
     29 // (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5
     30 // (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6
     31 // (  LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
     32 //
     33 // LUM0,              if lum0 <= lum1 and code(x,y) == 0
     34 // LUM1,              if lum0 <= lum1 and code(x,y) == 1
     35 // (4*LUM0+  LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
     36 // (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
     37 // (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
     38 // (  LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
     39 // 0,                 if lum0 <= lum1 and code(x,y) == 6
     40 // 255,               if lum0 <= lum1 and code(x,y) == 7
     41 
     42 static const int kLATCPaletteSize = 8;
     43 static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
     44     palette[0] = lum0;
     45     palette[1] = lum1;
     46     if (lum0 > lum1) {
     47         for (int i = 1; i < 7; i++) {
     48             palette[i+1] = ((7-i)*lum0 + i*lum1) / 7;
     49         }
     50     } else {
     51         for (int i = 1; i < 5; i++) {
     52             palette[i+1] = ((5-i)*lum0 + i*lum1) / 5;
     53         }
     54         palette[6] = 0;
     55         palette[7] = 255;
     56     }
     57 }
     58 
     59 ////////////////////////////////////////////////////////////////////////////////
     60 
     61 #if COMPRESS_LATC_SLOW
     62 
     63 ////////////////////////////////////////////////////////////////////////////////
     64 //
     65 // Utility Functions
     66 //
     67 ////////////////////////////////////////////////////////////////////////////////
     68 
     69 // Absolute difference between two values. More correct than SkTAbs(a - b)
     70 // because it works on unsigned values.
     71 template <typename T> inline T abs_diff(const T &a, const T &b) {
     72     return (a > b) ? (a - b) : (b - a);
     73 }
     74 
     75 static bool is_extremal(uint8_t pixel) {
     76     return 0 == pixel || 255 == pixel;
     77 }
     78 
     79 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
     80 
     81 // This function is used by both R11 EAC and LATC to compress 4x4 blocks
     82 // of 8-bit alpha into 64-bit values that comprise the compressed data.
     83 // For both formats, we need to make sure that the dimensions of the
     84 // src pixels are divisible by 4, and copy 4x4 blocks one at a time
     85 // for compression.
     86 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
     87                                      int width, int height, int rowBytes,
     88                                      A84x4To64BitProc proc) {
     89     // Make sure that our data is well-formed enough to be considered for compression
     90     if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
     91         return false;
     92     }
     93 
     94     int blocksX = width >> 2;
     95     int blocksY = height >> 2;
     96 
     97     uint8_t block[16];
     98     uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
     99     for (int y = 0; y < blocksY; ++y) {
    100         for (int x = 0; x < blocksX; ++x) {
    101             // Load block
    102             for (int k = 0; k < 4; ++k) {
    103                 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
    104             }
    105 
    106             // Compress it
    107             *encPtr = proc(block);
    108             ++encPtr;
    109         }
    110         src += 4 * rowBytes;
    111     }
    112 
    113     return true;
    114 }
    115 
    116 ////////////////////////////////////////////////////////////////////////////////
    117 //
    118 // LATC compressor
    119 //
    120 ////////////////////////////////////////////////////////////////////////////////
    121 
    122 // LATC compressed texels down into square 4x4 blocks
    123 static const int kLATCBlockSize = 4;
    124 static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;
    125 
    126 // Compress a block by using the bounding box of the pixels. It is assumed that
    127 // there are no extremal pixels in this block otherwise we would have used
    128 // compressBlockBBIgnoreExtremal.
    129 static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
    130     uint8_t minVal = 255;
    131     uint8_t maxVal = 0;
    132     for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
    133         minVal = SkTMin(pixels[i], minVal);
    134         maxVal = SkTMax(pixels[i], maxVal);
    135     }
    136 
    137     SkASSERT(!is_extremal(minVal));
    138     SkASSERT(!is_extremal(maxVal));
    139 
    140     uint8_t palette[kLATCPaletteSize];
    141     generate_latc_palette(palette, maxVal, minVal);
    142 
    143     uint64_t indices = 0;
    144     for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
    145 
    146         // Find the best palette index
    147         uint8_t bestError = abs_diff(pixels[i], palette[0]);
    148         uint8_t idx = 0;
    149         for (int j = 1; j < kLATCPaletteSize; ++j) {
    150             uint8_t error = abs_diff(pixels[i], palette[j]);
    151             if (error < bestError) {
    152                 bestError = error;
    153                 idx = j;
    154             }
    155         }
    156 
    157         indices <<= 3;
    158         indices |= idx;
    159     }
    160 
    161     return
    162         SkEndian_SwapLE64(
    163             static_cast<uint64_t>(maxVal) |
    164             (static_cast<uint64_t>(minVal) << 8) |
    165             (indices << 16));
    166 }
    167 
    168 // Compress a block by using the bounding box of the pixels without taking into
    169 // account the extremal values. The generated palette will contain extremal values
    170 // and fewer points along the line segment to interpolate.
    171 static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
    172     uint8_t minVal = 255;
    173     uint8_t maxVal = 0;
    174     for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
    175         if (is_extremal(pixels[i])) {
    176             continue;
    177         }
    178 
    179         minVal = SkTMin(pixels[i], minVal);
    180         maxVal = SkTMax(pixels[i], maxVal);
    181     }
    182 
    183     SkASSERT(!is_extremal(minVal));
    184     SkASSERT(!is_extremal(maxVal));
    185 
    186     uint8_t palette[kLATCPaletteSize];
    187     generate_latc_palette(palette, minVal, maxVal);
    188 
    189     uint64_t indices = 0;
    190     for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
    191 
    192         // Find the best palette index
    193         uint8_t idx = 0;
    194         if (is_extremal(pixels[i])) {
    195             if (0xFF == pixels[i]) {
    196                 idx = 7;
    197             } else if (0 == pixels[i]) {
    198                 idx = 6;
    199             } else {
    200                 SkFAIL("Pixel is extremal but not really?!");
    201             }
    202         } else {
    203             uint8_t bestError = abs_diff(pixels[i], palette[0]);
    204             for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
    205                 uint8_t error = abs_diff(pixels[i], palette[j]);
    206                 if (error < bestError) {
    207                     bestError = error;
    208                     idx = j;
    209                 }
    210             }
    211         }
    212 
    213         indices <<= 3;
    214         indices |= idx;
    215     }
    216 
    217     return
    218         SkEndian_SwapLE64(
    219             static_cast<uint64_t>(minVal) |
    220             (static_cast<uint64_t>(maxVal) << 8) |
    221             (indices << 16));
    222 }
    223 
    224 
    225 // Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from two
    226 // values LUM0 and LUM1, and an index into the generated palette. Details of how
    227 // the palette is generated can be found in the comments of generatePalette above.
    228 //
    229 // We choose which palette type to use based on whether or not 'pixels' contains
    230 // any extremal values (0 or 255). If there are extremal values, then we use the
    231 // palette that has the extremal values built in. Otherwise, we use the full bounding
    232 // box.
    233 
    234 static uint64_t compress_latc_block(const uint8_t pixels[]) {
    235     // Collect unique pixels
    236     int nUniquePixels = 0;
    237     uint8_t uniquePixels[kLATCPixelsPerBlock];
    238     for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
    239         bool foundPixel = false;
    240         for (int j = 0; j < nUniquePixels; ++j) {
    241             foundPixel = foundPixel || uniquePixels[j] == pixels[i];
    242         }
    243 
    244         if (!foundPixel) {
    245             uniquePixels[nUniquePixels] = pixels[i];
    246             ++nUniquePixels;
    247         }
    248     }
    249 
    250     // If there's only one unique pixel, then our compression is easy.
    251     if (1 == nUniquePixels) {
    252         return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8));
    253 
    254     // Similarly, if there are only two unique pixels, then our compression is
    255     // easy again: place the pixels in the block header, and assign the indices
    256     // with one or zero depending on which pixel they belong to.
    257     } else if (2 == nUniquePixels) {
    258         uint64_t outBlock = 0;
    259         for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
    260             int idx = 0;
    261             if (pixels[i] == uniquePixels[1]) {
    262                 idx = 1;
    263             }
    264 
    265             outBlock <<= 3;
    266             outBlock |= idx;
    267         }
    268         outBlock <<= 16;
    269         outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8));
    270         return SkEndian_SwapLE64(outBlock);
    271     }
    272 
    273     // Count non-maximal pixel values
    274     int nonExtremalPixels = 0;
    275     for (int i = 0; i < nUniquePixels; ++i) {
    276         if (!is_extremal(uniquePixels[i])) {
    277             ++nonExtremalPixels;
    278         }
    279     }
    280 
    281     // If all the pixels are nonmaximal then compute the palette using
    282     // the bounding box of all the pixels.
    283     if (nonExtremalPixels == nUniquePixels) {
    284         // This is really just for correctness, in all of my tests we
    285         // never take this step. We don't lose too much perf here because
    286         // most of the processing in this function is worth it for the
    287         // 1 == nUniquePixels optimization.
    288         return compress_latc_block_bb(pixels);
    289     } else {
    290         return compress_latc_block_bb_ignore_extremal(pixels);
    291     }
    292 }
    293 
    294 #endif  // COMPRESS_LATC_SLOW
    295 
    296 ////////////////////////////////////////////////////////////////////////////////
    297 
    298 #if COMPRESS_LATC_FAST
    299 
    300 // Take the top three bits of each index and pack them into the low 12
    301 // bits of the integer.
    302 static inline uint32_t pack_index(uint32_t x) {
    303     // Pack it in...
    304 #if defined (SK_CPU_BENDIAN)
    305     return
    306         (x >> 24) |
    307         ((x >> 13) & 0x38) |
    308         ((x >> 2) & 0x1C0) |
    309         ((x << 9) & 0xE00);
    310 #else
    311     return
    312         (x & 0x7) |
    313         ((x >> 5) & 0x38) |
    314         ((x >> 10) & 0x1C0) |
    315         ((x >> 15) & 0xE00);
    316 #endif
    317 }
    318 
    319 // Converts each 8-bit byte in the integer into an LATC index, and then packs
    320 // the indices into the low 12 bits of the integer.
    321 static inline uint32_t convert_index(uint32_t x) {
    322     // Since the palette is
    323     // 255, 0, 219, 182, 146, 109, 73, 36
    324     // we need to map the high three bits of each byte in the integer
    325     // from
    326     // 0 1 2 3 4 5 6 7
    327     // to
    328     // 1 7 6 5 4 3 2 0
    329     //
    330     // This first operation takes the mapping from
    331     // 0 1 2 3 4 5 6 7  -->  7 6 5 4 3 2 1 0
    332     x = 0x07070707 - ((x >> 5) & 0x07070707);
    333 
    334     // mask is 1 if index is non-zero
    335     const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101;
    336 
    337     // add mask:
    338     // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0
    339     x = (x + mask);
    340 
    341     // Handle overflow:
    342     // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0
    343     x |= (x >> 3) & 0x01010101;
    344 
    345     // Mask out high bits:
    346     // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0
    347     x &= 0x07070707;
    348 
    349     return pack_index(x);
    350 }
    351 
    352 typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, int rowBytes);
    353 template<PackIndicesProc packIndicesProc>
    354 static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int rowBytes) {
    355     *(reinterpret_cast<uint64_t*>(*dstPtr)) =
    356         SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16));
    357     *dstPtr += 8;
    358 }
    359 
    360 inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) {
    361     uint64_t result = 0;
    362     for (int i = 0; i < 4; ++i) {
    363         const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*rowBytes));
    364         result |= static_cast<uint64_t>(convert_index(idx)) << 12*i;
    365     }
    366     return result;
    367 }
    368 
    369 inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) {
    370     // !SPEED! Blarg, this is kind of annoying. SSE4 can make this
    371     // a LOT faster.
    372     uint8_t transposed[16];
    373     for (int i = 0; i < 4; ++i) {
    374         for (int j = 0; j < 4; ++j) {
    375             transposed[j*4+i] = indices[i*rowBytes + j];
    376         }
    377     }
    378 
    379     return PackRowMajor(transposed, 4);
    380 }
    381 
    382 static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src,
    383                                  int width, int height, int rowBytes) {
    384 
    385     if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) {
    386         return false;
    387     }
    388 
    389     uint8_t** dstPtr = &dst;
    390     for (int y = 0; y < height; y += 4) {
    391         for (int x = 0; x < width; x += 4) {
    392             compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, rowBytes);
    393         }
    394     }
    395 
    396     return true;
    397 }
    398 
    399 void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) {
    400     compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
    401 }
    402 
    403 #endif  // COMPRESS_LATC_FAST
    404 
    405 void decompress_latc_block(uint8_t* dst, int dstRowBytes, const uint8_t* src) {
    406     uint64_t block = SkEndian_SwapLE64(*(reinterpret_cast<const uint64_t *>(src)));
    407     uint8_t lum0 = block & 0xFF;
    408     uint8_t lum1 = (block >> 8) & 0xFF;
    409 
    410     uint8_t palette[kLATCPaletteSize];
    411     generate_latc_palette(palette, lum0, lum1);
    412 
    413     block >>= 16;
    414     for (int j = 0; j < 4; ++j) {
    415         for (int i = 0; i < 4; ++i) {
    416             dst[i] = palette[block & 0x7];
    417             block >>= 3;
    418         }
    419         dst += dstRowBytes;
    420     }
    421 }
    422 
    423 // This is the type passed as the CompressorType argument of the compressed
    424 // blitter for the LATC format. The static functions required to be in this
    425 // struct are documented in SkTextureCompressor_Blitter.h
    426 struct CompressorLATC {
    427     static inline void CompressA8Vertical(uint8_t* dst, const uint8_t block[]) {
    428         compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
    429     }
    430 
    431     static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src,
    432                                             int srcRowBytes) {
    433         compress_a8_latc_block<PackRowMajor>(&dst, src, srcRowBytes);
    434     }
    435 
    436 #if PEDANTIC_BLIT_RECT
    437     static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes,
    438                                    const uint8_t* mask) {
    439         // Pack the mask
    440         uint64_t cmpMask = 0;
    441         for (int i = 0; i < 4; ++i) {
    442             const uint32_t idx = *(reinterpret_cast<const uint32_t*>(src + i*srcRowBytes));
    443             cmpMask |= static_cast<uint64_t>(pack_index(idx)) << 12*i;
    444         }
    445         cmpMask = SkEndian_SwapLE64(cmpMask << 16); // avoid header
    446 
    447         uint64_t cmpSrc;
    448         uint8_t *cmpSrcPtr = reinterpret_cast<uint8_t*>(&cmpSrc);
    449         compress_a8_latc_block<PackRowMajor>(&cmpSrcPtr, src, srcRowBytes);
    450 
    451         // Mask out header
    452         cmpSrc = cmpSrc & cmpMask;
    453 
    454         // Read destination encoding
    455         uint64_t *cmpDst = reinterpret_cast<uint64_t*>(dst);
    456 
    457         // If the destination is the encoding for a blank block, then we need
    458         // to properly set the header
    459         if (0 == cmpDst) {
    460             *cmpDst = SkTEndian_SwapLE64(0x24924924924900FFULL);
    461         }
    462 
    463         // Set the new indices
    464         *cmpDst &= ~cmpMask;
    465         *cmpDst |= cmpSrc;
    466     }
    467 #endif  // PEDANTIC_BLIT_RECT
    468 };
    469 
    470 ////////////////////////////////////////////////////////////////////////////////
    471 
    472 namespace SkTextureCompressor {
    473 
    474 bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {
    475 #if COMPRESS_LATC_FAST
    476     return compress_4x4_a8_latc(dst, src, width, height, rowBytes);
    477 #elif COMPRESS_LATC_SLOW
    478     return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_latc_block);
    479 #else
    480 #error "Must choose either fast or slow LATC compression"
    481 #endif
    482 }
    483 
    484 SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer,
    485                              SkTBlitterAllocator* allocator) {
    486     if ((width % 4) != 0 || (height % 4) != 0) {
    487         return NULL;
    488     }
    489 
    490 #if COMPRESS_LATC_FAST
    491     // Memset the output buffer to an encoding that decodes to zero. We must do this
    492     // in order to avoid having uninitialized values in the buffer if the blitter
    493     // decides not to write certain scanlines (and skip entire rows of blocks).
    494     // In the case of LATC, if everything is zero, then LUM0 and LUM1 are also zero,
    495     // and they will only be non-zero (0xFF) if the index is 7. So bzero will do just fine.
    496     // (8 bytes per block) * (w * h / 16 blocks) = w * h / 2
    497     sk_bzero(outputBuffer, width * height / 2);
    498 
    499     return allocator->createT<
    500         SkTCompressedAlphaBlitter<4, 8, CompressorLATC>, int, int, void* >
    501         (width, height, outputBuffer);
    502 #elif COMPRESS_LATC_SLOW
    503     // TODO (krajcevski)
    504     return NULL;
    505 #endif
    506 }
    507 
    508 void DecompressLATC(uint8_t* dst, int dstRowBytes, const uint8_t* src, int width, int height) {
    509     for (int j = 0; j < height; j += 4) {
    510         for (int i = 0; i < width; i += 4) {
    511             decompress_latc_block(dst + i, dstRowBytes, src);
    512             src += 8;
    513         }
    514         dst += 4 * dstRowBytes;
    515     }
    516 }
    517 
    518 }  // SkTextureCompressor
    519