1 // Copyright 2011 Google Inc. All Rights Reserved. 2 // 3 // This code is licensed under the same terms as WebM: 4 // Software License Agreement: http://www.webmproject.org/license/software/ 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 6 // ----------------------------------------------------------------------------- 7 // 8 // YUV to RGB upsampling functions. 9 // 10 // Author: somnath (at) google.com (Somnath Banerjee) 11 12 #include "./dsp.h" 13 #include "./yuv.h" 14 15 #if defined(__cplusplus) || defined(c_plusplus) 16 extern "C" { 17 #endif 18 19 //------------------------------------------------------------------------------ 20 // Fancy upsampler 21 22 #ifdef FANCY_UPSAMPLING 23 24 // Fancy upsampling functions to convert YUV to RGB 25 WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST]; 26 27 // Given samples laid out in a square as: 28 // [a b] 29 // [c d] 30 // we interpolate u/v as: 31 // ([9*a + 3*b + 3*c + d 3*a + 9*b + 3*c + d] + [8 8]) / 16 32 // ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16 33 34 // We process u and v together stashed into 32bit (16bit each). 35 #define LOAD_UV(u,v) ((u) | ((v) << 16)) 36 37 #define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \ 38 static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ 39 const uint8_t* top_u, const uint8_t* top_v, \ 40 const uint8_t* cur_u, const uint8_t* cur_v, \ 41 uint8_t* top_dst, uint8_t* bottom_dst, int len) { \ 42 int x; \ 43 const int last_pixel_pair = (len - 1) >> 1; \ 44 uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \ 45 uint32_t l_uv = LOAD_UV(cur_u[0], cur_v[0]); /* left-sample */ \ 46 if (top_y) { \ 47 const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \ 48 FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \ 49 } \ 50 if (bottom_y) { \ 51 const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \ 52 FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst); \ 53 } \ 54 for (x = 1; x <= last_pixel_pair; ++x) { \ 55 const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]); /* top sample */ \ 56 const uint32_t uv = LOAD_UV(cur_u[x], cur_v[x]); /* sample */ \ 57 /* precompute invariant values associated with first and second diagonals*/\ 58 const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u; \ 59 const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3; \ 60 const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3; \ 61 if (top_y) { \ 62 const uint32_t uv0 = (diag_12 + tl_uv) >> 1; \ 63 const uint32_t uv1 = (diag_03 + t_uv) >> 1; \ 64 FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \ 65 top_dst + (2 * x - 1) * XSTEP); \ 66 FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16), \ 67 top_dst + (2 * x - 0) * XSTEP); \ 68 } \ 69 if (bottom_y) { \ 70 const uint32_t uv0 = (diag_03 + l_uv) >> 1; \ 71 const uint32_t uv1 = (diag_12 + uv) >> 1; \ 72 FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \ 73 bottom_dst + (2 * x - 1) * XSTEP); \ 74 FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16), \ 75 bottom_dst + (2 * x + 0) * XSTEP); \ 76 } \ 77 tl_uv = t_uv; \ 78 l_uv = uv; \ 79 } \ 80 if (!(len & 1)) { \ 81 if (top_y) { \ 82 const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \ 83 FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16), \ 84 top_dst + (len - 1) * XSTEP); \ 85 } \ 86 if (bottom_y) { \ 87 const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \ 88 FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16), \ 89 bottom_dst + (len - 1) * XSTEP); \ 90 } \ 91 } \ 92 } 93 94 // All variants implemented. 95 UPSAMPLE_FUNC(UpsampleRgbLinePair, VP8YuvToRgb, 3) 96 UPSAMPLE_FUNC(UpsampleBgrLinePair, VP8YuvToBgr, 3) 97 UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4) 98 UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4) 99 UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4) 100 UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2) 101 UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2) 102 103 #undef LOAD_UV 104 #undef UPSAMPLE_FUNC 105 106 #endif // FANCY_UPSAMPLING 107 108 //------------------------------------------------------------------------------ 109 // simple point-sampling 110 111 #define SAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \ 112 static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ 113 const uint8_t* u, const uint8_t* v, \ 114 uint8_t* top_dst, uint8_t* bottom_dst, int len) { \ 115 int i; \ 116 for (i = 0; i < len - 1; i += 2) { \ 117 FUNC(top_y[0], u[0], v[0], top_dst); \ 118 FUNC(top_y[1], u[0], v[0], top_dst + XSTEP); \ 119 FUNC(bottom_y[0], u[0], v[0], bottom_dst); \ 120 FUNC(bottom_y[1], u[0], v[0], bottom_dst + XSTEP); \ 121 top_y += 2; \ 122 bottom_y += 2; \ 123 u++; \ 124 v++; \ 125 top_dst += 2 * XSTEP; \ 126 bottom_dst += 2 * XSTEP; \ 127 } \ 128 if (i == len - 1) { /* last one */ \ 129 FUNC(top_y[0], u[0], v[0], top_dst); \ 130 FUNC(bottom_y[0], u[0], v[0], bottom_dst); \ 131 } \ 132 } 133 134 // All variants implemented. 135 SAMPLE_FUNC(SampleRgbLinePair, VP8YuvToRgb, 3) 136 SAMPLE_FUNC(SampleBgrLinePair, VP8YuvToBgr, 3) 137 SAMPLE_FUNC(SampleRgbaLinePair, VP8YuvToRgba, 4) 138 SAMPLE_FUNC(SampleBgraLinePair, VP8YuvToBgra, 4) 139 SAMPLE_FUNC(SampleArgbLinePair, VP8YuvToArgb, 4) 140 SAMPLE_FUNC(SampleRgba4444LinePair, VP8YuvToRgba4444, 2) 141 SAMPLE_FUNC(SampleRgb565LinePair, VP8YuvToRgb565, 2) 142 143 #undef SAMPLE_FUNC 144 145 const WebPSampleLinePairFunc WebPSamplers[MODE_LAST] = { 146 SampleRgbLinePair, // MODE_RGB 147 SampleRgbaLinePair, // MODE_RGBA 148 SampleBgrLinePair, // MODE_BGR 149 SampleBgraLinePair, // MODE_BGRA 150 SampleArgbLinePair, // MODE_ARGB 151 SampleRgba4444LinePair, // MODE_RGBA_4444 152 SampleRgb565LinePair, // MODE_RGB_565 153 SampleRgbaLinePair, // MODE_rgbA 154 SampleBgraLinePair, // MODE_bgrA 155 SampleArgbLinePair, // MODE_Argb 156 SampleRgba4444LinePair // MODE_rgbA_4444 157 }; 158 159 //------------------------------------------------------------------------------ 160 161 #if !defined(FANCY_UPSAMPLING) 162 #define DUAL_SAMPLE_FUNC(FUNC_NAME, FUNC) \ 163 static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \ 164 const uint8_t* top_u, const uint8_t* top_v, \ 165 const uint8_t* bot_u, const uint8_t* bot_v, \ 166 uint8_t* top_dst, uint8_t* bot_dst, int len) { \ 167 const int half_len = len >> 1; \ 168 int x; \ 169 if (top_dst != NULL) { \ 170 for (x = 0; x < half_len; ++x) { \ 171 FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x + 0); \ 172 FUNC(top_y[2 * x + 1], top_u[x], top_v[x], top_dst + 8 * x + 4); \ 173 } \ 174 if (len & 1) FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x); \ 175 } \ 176 if (bot_dst != NULL) { \ 177 for (x = 0; x < half_len; ++x) { \ 178 FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x + 0); \ 179 FUNC(bot_y[2 * x + 1], bot_u[x], bot_v[x], bot_dst + 8 * x + 4); \ 180 } \ 181 if (len & 1) FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x); \ 182 } \ 183 } 184 185 DUAL_SAMPLE_FUNC(DualLineSamplerBGRA, VP8YuvToBgra) 186 DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb) 187 #undef DUAL_SAMPLE_FUNC 188 189 #endif // !FANCY_UPSAMPLING 190 191 WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) { 192 WebPInitUpsamplers(); 193 VP8YUVInit(); 194 #ifdef FANCY_UPSAMPLING 195 return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB]; 196 #else 197 return (alpha_is_last ? DualLineSamplerBGRA : DualLineSamplerARGB); 198 #endif 199 } 200 201 //------------------------------------------------------------------------------ 202 // YUV444 converter 203 204 #define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \ 205 static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \ 206 uint8_t* dst, int len) { \ 207 int i; \ 208 for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \ 209 } 210 211 YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb, 3) 212 YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr, 3) 213 YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba, 4) 214 YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra, 4) 215 YUV444_FUNC(Yuv444ToArgb, VP8YuvToArgb, 4) 216 YUV444_FUNC(Yuv444ToRgba4444, VP8YuvToRgba4444, 2) 217 YUV444_FUNC(Yuv444ToRgb565, VP8YuvToRgb565, 2) 218 219 #undef YUV444_FUNC 220 221 const WebPYUV444Converter WebPYUV444Converters[MODE_LAST] = { 222 Yuv444ToRgb, // MODE_RGB 223 Yuv444ToRgba, // MODE_RGBA 224 Yuv444ToBgr, // MODE_BGR 225 Yuv444ToBgra, // MODE_BGRA 226 Yuv444ToArgb, // MODE_ARGB 227 Yuv444ToRgba4444, // MODE_RGBA_4444 228 Yuv444ToRgb565, // MODE_RGB_565 229 Yuv444ToRgba, // MODE_rgbA 230 Yuv444ToBgra, // MODE_bgrA 231 Yuv444ToArgb, // MODE_Argb 232 Yuv444ToRgba4444 // MODE_rgbA_4444 233 }; 234 235 //------------------------------------------------------------------------------ 236 // Premultiplied modes 237 238 // non dithered-modes 239 240 // (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.) 241 // for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5), 242 // one can use instead: (x * a * 65793 + (1 << 23)) >> 24 243 #if 1 // (int)(x * a / 255.) 244 #define MULTIPLIER(a) ((a) * 32897UL) 245 #define PREMULTIPLY(x, m) (((x) * (m)) >> 23) 246 #else // (int)(x * a / 255. + .5) 247 #define MULTIPLIER(a) ((a) * 65793UL) 248 #define PREMULTIPLY(x, m) (((x) * (m) + (1UL << 23)) >> 24) 249 #endif 250 251 static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first, 252 int w, int h, int stride) { 253 while (h-- > 0) { 254 uint8_t* const rgb = rgba + (alpha_first ? 1 : 0); 255 const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3); 256 int i; 257 for (i = 0; i < w; ++i) { 258 const uint32_t a = alpha[4 * i]; 259 if (a != 0xff) { 260 const uint32_t mult = MULTIPLIER(a); 261 rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult); 262 rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult); 263 rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult); 264 } 265 } 266 rgba += stride; 267 } 268 } 269 #undef MULTIPLIER 270 #undef PREMULTIPLY 271 272 // rgbA4444 273 274 #define MULTIPLIER(a) ((a) * 0x1111) // 0x1111 ~= (1 << 16) / 15 275 276 static WEBP_INLINE uint8_t dither_hi(uint8_t x) { 277 return (x & 0xf0) | (x >> 4); 278 } 279 280 static WEBP_INLINE uint8_t dither_lo(uint8_t x) { 281 return (x & 0x0f) | (x << 4); 282 } 283 284 static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) { 285 return (x * m) >> 16; 286 } 287 288 static void ApplyAlphaMultiply4444(uint8_t* rgba4444, 289 int w, int h, int stride) { 290 while (h-- > 0) { 291 int i; 292 for (i = 0; i < w; ++i) { 293 const uint8_t a = (rgba4444[2 * i + 1] & 0x0f); 294 const uint32_t mult = MULTIPLIER(a); 295 const uint8_t r = multiply(dither_hi(rgba4444[2 * i + 0]), mult); 296 const uint8_t g = multiply(dither_lo(rgba4444[2 * i + 0]), mult); 297 const uint8_t b = multiply(dither_hi(rgba4444[2 * i + 1]), mult); 298 rgba4444[2 * i + 0] = (r & 0xf0) | ((g >> 4) & 0x0f); 299 rgba4444[2 * i + 1] = (b & 0xf0) | a; 300 } 301 rgba4444 += stride; 302 } 303 } 304 #undef MULTIPLIER 305 306 void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int) 307 = ApplyAlphaMultiply; 308 void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int) 309 = ApplyAlphaMultiply4444; 310 311 //------------------------------------------------------------------------------ 312 // Main call 313 314 void WebPInitUpsamplers(void) { 315 #ifdef FANCY_UPSAMPLING 316 WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair; 317 WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair; 318 WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair; 319 WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair; 320 WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair; 321 WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair; 322 WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair; 323 324 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 325 if (VP8GetCPUInfo != NULL) { 326 #if defined(WEBP_USE_SSE2) 327 if (VP8GetCPUInfo(kSSE2)) { 328 WebPInitUpsamplersSSE2(); 329 } 330 #endif 331 } 332 #endif // FANCY_UPSAMPLING 333 } 334 335 void WebPInitPremultiply(void) { 336 WebPApplyAlphaMultiply = ApplyAlphaMultiply; 337 WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply4444; 338 339 #ifdef FANCY_UPSAMPLING 340 WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair; 341 WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair; 342 WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair; 343 WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair; 344 345 if (VP8GetCPUInfo != NULL) { 346 #if defined(WEBP_USE_SSE2) 347 if (VP8GetCPUInfo(kSSE2)) { 348 WebPInitPremultiplySSE2(); 349 } 350 #endif 351 } 352 #endif // FANCY_UPSAMPLING 353 } 354 355 #if defined(__cplusplus) || defined(c_plusplus) 356 } // extern "C" 357 #endif 358