Home | History | Annotate | Download | only in dsp
      1 // Copyright 2013 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Utilities for processing transparent channel.
     11 //
     12 // Author: Skal (pascal.massimino (at) gmail.com)
     13 
     14 #include <assert.h>
     15 #include "./dsp.h"
     16 
     17 // Tables can be faster on some platform but incur some extra binary size (~2k).
     18 // #define USE_TABLES_FOR_ALPHA_MULT
     19 
     20 // -----------------------------------------------------------------------------
     21 
     22 #define MFIX 24    // 24bit fixed-point arithmetic
     23 #define HALF ((1u << MFIX) >> 1)
     24 #define KINV_255 ((1u << MFIX) / 255u)
     25 
     26 static uint32_t Mult(uint8_t x, uint32_t mult) {
     27   const uint32_t v = (x * mult + HALF) >> MFIX;
     28   assert(v <= 255);  // <- 24bit precision is enough to ensure that.
     29   return v;
     30 }
     31 
     32 #ifdef USE_TABLES_FOR_ALPHA_MULT
     33 
     34 static const uint32_t kMultTables[2][256] = {
     35   {    // (255u << MFIX) / alpha
     36     0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
     37     0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
     38     0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
     39     0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
     40     0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
     41     0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
     42     0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
     43     0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
     44     0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
     45     0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
     46     0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
     47     0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
     48     0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
     49     0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
     50     0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
     51     0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
     52     0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
     53     0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
     54     0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
     55     0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
     56     0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
     57     0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
     58     0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
     59     0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
     60     0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
     61     0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
     62     0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
     63     0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
     64     0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
     65     0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
     66     0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
     67     0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
     68     0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
     69     0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
     70     0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
     71     0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
     72     0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
     73     0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
     74     0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
     75     0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
     76     0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
     77     0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
     78     0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
     79   {   // alpha * KINV_255
     80     0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
     81     0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
     82     0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
     83     0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
     84     0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
     85     0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
     86     0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
     87     0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
     88     0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
     89     0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
     90     0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
     91     0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
     92     0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
     93     0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
     94     0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
     95     0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
     96     0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
     97     0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
     98     0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
     99     0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
    100     0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
    101     0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
    102     0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
    103     0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
    104     0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
    105     0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
    106     0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
    107     0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
    108     0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
    109     0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
    110     0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
    111     0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
    112     0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
    113     0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
    114     0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
    115     0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
    116     0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
    117     0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
    118     0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
    119     0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
    120     0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
    121     0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
    122     0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
    123 };
    124 
    125 static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
    126   return kMultTables[!inverse][a];
    127 }
    128 
    129 #else
    130 
    131 static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
    132   return inverse ? (255u << MFIX) / a : a * KINV_255;
    133 }
    134 
    135 #endif    // USE_TABLES_FOR_ALPHA_MULT
    136 
    137 static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
    138   int x;
    139   for (x = 0; x < width; ++x) {
    140     const uint32_t argb = ptr[x];
    141     if (argb < 0xff000000u) {      // alpha < 255
    142       if (argb <= 0x00ffffffu) {   // alpha == 0
    143         ptr[x] = 0;
    144       } else {
    145         const uint32_t alpha = (argb >> 24) & 0xff;
    146         const uint32_t scale = GetScale(alpha, inverse);
    147         uint32_t out = argb & 0xff000000u;
    148         out |= Mult(argb >>  0, scale) <<  0;
    149         out |= Mult(argb >>  8, scale) <<  8;
    150         out |= Mult(argb >> 16, scale) << 16;
    151         ptr[x] = out;
    152       }
    153     }
    154   }
    155 }
    156 
    157 static void MultRow(uint8_t* const ptr, const uint8_t* const alpha,
    158                     int width, int inverse) {
    159   int x;
    160   for (x = 0; x < width; ++x) {
    161     const uint32_t a = alpha[x];
    162     if (a != 255) {
    163       if (a == 0) {
    164         ptr[x] = 0;
    165       } else {
    166         const uint32_t scale = GetScale(a, inverse);
    167         ptr[x] = Mult(ptr[x], scale);
    168       }
    169     }
    170   }
    171 }
    172 
    173 #undef KINV_255
    174 #undef HALF
    175 #undef MFIX
    176 
    177 void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
    178 void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
    179                     int width, int inverse);
    180 
    181 //------------------------------------------------------------------------------
    182 // Generic per-plane calls
    183 
    184 void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
    185                       int inverse) {
    186   int n;
    187   for (n = 0; n < num_rows; ++n) {
    188     WebPMultARGBRow((uint32_t*)ptr, width, inverse);
    189     ptr += stride;
    190   }
    191 }
    192 
    193 void WebPMultRows(uint8_t* ptr, int stride,
    194                   const uint8_t* alpha, int alpha_stride,
    195                   int width, int num_rows, int inverse) {
    196   int n;
    197   for (n = 0; n < num_rows; ++n) {
    198     WebPMultRow(ptr, alpha, width, inverse);
    199     ptr += stride;
    200     alpha += alpha_stride;
    201   }
    202 }
    203 
    204 //------------------------------------------------------------------------------
    205 // Premultiplied modes
    206 
    207 // non dithered-modes
    208 
    209 // (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
    210 // for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
    211 // one can use instead: (x * a * 65793 + (1 << 23)) >> 24
    212 #if 1     // (int)(x * a / 255.)
    213 #define MULTIPLIER(a)   ((a) * 32897U)
    214 #define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
    215 #else     // (int)(x * a / 255. + .5)
    216 #define MULTIPLIER(a) ((a) * 65793U)
    217 #define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
    218 #endif
    219 
    220 static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
    221                                int w, int h, int stride) {
    222   while (h-- > 0) {
    223     uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
    224     const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
    225     int i;
    226     for (i = 0; i < w; ++i) {
    227       const uint32_t a = alpha[4 * i];
    228       if (a != 0xff) {
    229         const uint32_t mult = MULTIPLIER(a);
    230         rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
    231         rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
    232         rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
    233       }
    234     }
    235     rgba += stride;
    236   }
    237 }
    238 #undef MULTIPLIER
    239 #undef PREMULTIPLY
    240 
    241 // rgbA4444
    242 
    243 #define MULTIPLIER(a)  ((a) * 0x1111)    // 0x1111 ~= (1 << 16) / 15
    244 
    245 static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
    246   return (x & 0xf0) | (x >> 4);
    247 }
    248 
    249 static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
    250   return (x & 0x0f) | (x << 4);
    251 }
    252 
    253 static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
    254   return (x * m) >> 16;
    255 }
    256 
    257 static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
    258                                                int w, int h, int stride,
    259                                                int rg_byte_pos /* 0 or 1 */) {
    260   while (h-- > 0) {
    261     int i;
    262     for (i = 0; i < w; ++i) {
    263       const uint32_t rg = rgba4444[2 * i + rg_byte_pos];
    264       const uint32_t ba = rgba4444[2 * i + (rg_byte_pos ^ 1)];
    265       const uint8_t a = ba & 0x0f;
    266       const uint32_t mult = MULTIPLIER(a);
    267       const uint8_t r = multiply(dither_hi(rg), mult);
    268       const uint8_t g = multiply(dither_lo(rg), mult);
    269       const uint8_t b = multiply(dither_hi(ba), mult);
    270       rgba4444[2 * i + rg_byte_pos] = (r & 0xf0) | ((g >> 4) & 0x0f);
    271       rgba4444[2 * i + (rg_byte_pos ^ 1)] = (b & 0xf0) | a;
    272     }
    273     rgba4444 += stride;
    274   }
    275 }
    276 #undef MULTIPLIER
    277 
    278 static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
    279                                    int w, int h, int stride) {
    280 #ifdef WEBP_SWAP_16BIT_CSP
    281   ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
    282 #else
    283   ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
    284 #endif
    285 }
    286 
    287 static int ExtractAlpha(const uint8_t* argb, int argb_stride,
    288                         int width, int height,
    289                         uint8_t* alpha, int alpha_stride) {
    290   uint8_t alpha_mask = 0xff;
    291   int i, j;
    292 
    293   for (j = 0; j < height; ++j) {
    294     for (i = 0; i < width; ++i) {
    295       const uint8_t alpha_value = argb[4 * i];
    296       alpha[i] = alpha_value;
    297       alpha_mask &= alpha_value;
    298     }
    299     argb += argb_stride;
    300     alpha += alpha_stride;
    301   }
    302   return (alpha_mask == 0xff);
    303 }
    304 
    305 void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
    306 void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
    307 int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
    308 
    309 //------------------------------------------------------------------------------
    310 // Init function
    311 
    312 extern void WebPInitAlphaProcessingSSE2(void);
    313 
    314 static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
    315     (VP8CPUInfo)&alpha_processing_last_cpuinfo_used;
    316 
    317 void WebPInitAlphaProcessing(void) {
    318   if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
    319 
    320   WebPMultARGBRow = MultARGBRow;
    321   WebPMultRow = MultRow;
    322   WebPApplyAlphaMultiply = ApplyAlphaMultiply;
    323   WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
    324   WebPExtractAlpha = ExtractAlpha;
    325 
    326   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
    327   if (VP8GetCPUInfo != NULL) {
    328 #if defined(WEBP_USE_SSE2)
    329     if (VP8GetCPUInfo(kSSE2)) {
    330       WebPInitAlphaProcessingSSE2();
    331     }
    332 #endif
    333   }
    334   alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
    335 }
    336