1 // Copyright 2010 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // YUV->RGB conversion functions 11 // 12 // Author: Skal (pascal.massimino (at) gmail.com) 13 14 #include "src/dsp/yuv.h" 15 16 #include <assert.h> 17 #include <stdlib.h> 18 19 //----------------------------------------------------------------------------- 20 // Plain-C version 21 22 #define ROW_FUNC(FUNC_NAME, FUNC, XSTEP) \ 23 static void FUNC_NAME(const uint8_t* y, \ 24 const uint8_t* u, const uint8_t* v, \ 25 uint8_t* dst, int len) { \ 26 const uint8_t* const end = dst + (len & ~1) * (XSTEP); \ 27 while (dst != end) { \ 28 FUNC(y[0], u[0], v[0], dst); \ 29 FUNC(y[1], u[0], v[0], dst + (XSTEP)); \ 30 y += 2; \ 31 ++u; \ 32 ++v; \ 33 dst += 2 * (XSTEP); \ 34 } \ 35 if (len & 1) { \ 36 FUNC(y[0], u[0], v[0], dst); \ 37 } \ 38 } \ 39 40 // All variants implemented. 41 ROW_FUNC(YuvToRgbRow, VP8YuvToRgb, 3) 42 ROW_FUNC(YuvToBgrRow, VP8YuvToBgr, 3) 43 ROW_FUNC(YuvToRgbaRow, VP8YuvToRgba, 4) 44 ROW_FUNC(YuvToBgraRow, VP8YuvToBgra, 4) 45 ROW_FUNC(YuvToArgbRow, VP8YuvToArgb, 4) 46 ROW_FUNC(YuvToRgba4444Row, VP8YuvToRgba4444, 2) 47 ROW_FUNC(YuvToRgb565Row, VP8YuvToRgb565, 2) 48 49 #undef ROW_FUNC 50 51 // Main call for processing a plane with a WebPSamplerRowFunc function: 52 void WebPSamplerProcessPlane(const uint8_t* y, int y_stride, 53 const uint8_t* u, const uint8_t* v, int uv_stride, 54 uint8_t* dst, int dst_stride, 55 int width, int height, WebPSamplerRowFunc func) { 56 int j; 57 for (j = 0; j < height; ++j) { 58 func(y, u, v, dst, width); 59 y += y_stride; 60 if (j & 1) { 61 u += uv_stride; 62 v += uv_stride; 63 } 64 dst += dst_stride; 65 } 66 } 67 68 //----------------------------------------------------------------------------- 69 // Main call 70 71 WebPSamplerRowFunc WebPSamplers[MODE_LAST]; 72 73 extern void WebPInitSamplersSSE2(void); 74 extern void WebPInitSamplersSSE41(void); 75 extern void WebPInitSamplersMIPS32(void); 76 extern void WebPInitSamplersMIPSdspR2(void); 77 78 WEBP_DSP_INIT_FUNC(WebPInitSamplers) { 79 WebPSamplers[MODE_RGB] = YuvToRgbRow; 80 WebPSamplers[MODE_RGBA] = YuvToRgbaRow; 81 WebPSamplers[MODE_BGR] = YuvToBgrRow; 82 WebPSamplers[MODE_BGRA] = YuvToBgraRow; 83 WebPSamplers[MODE_ARGB] = YuvToArgbRow; 84 WebPSamplers[MODE_RGBA_4444] = YuvToRgba4444Row; 85 WebPSamplers[MODE_RGB_565] = YuvToRgb565Row; 86 WebPSamplers[MODE_rgbA] = YuvToRgbaRow; 87 WebPSamplers[MODE_bgrA] = YuvToBgraRow; 88 WebPSamplers[MODE_Argb] = YuvToArgbRow; 89 WebPSamplers[MODE_rgbA_4444] = YuvToRgba4444Row; 90 91 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 92 if (VP8GetCPUInfo != NULL) { 93 #if defined(WEBP_USE_SSE2) 94 if (VP8GetCPUInfo(kSSE2)) { 95 WebPInitSamplersSSE2(); 96 } 97 #endif // WEBP_USE_SSE2 98 #if defined(WEBP_USE_SSE41) 99 if (VP8GetCPUInfo(kSSE4_1)) { 100 WebPInitSamplersSSE41(); 101 } 102 #endif // WEBP_USE_SSE41 103 #if defined(WEBP_USE_MIPS32) 104 if (VP8GetCPUInfo(kMIPS32)) { 105 WebPInitSamplersMIPS32(); 106 } 107 #endif // WEBP_USE_MIPS32 108 #if defined(WEBP_USE_MIPS_DSP_R2) 109 if (VP8GetCPUInfo(kMIPSdspR2)) { 110 WebPInitSamplersMIPSdspR2(); 111 } 112 #endif // WEBP_USE_MIPS_DSP_R2 113 } 114 } 115 116 //----------------------------------------------------------------------------- 117 // ARGB -> YUV converters 118 119 static void ConvertARGBToY_C(const uint32_t* argb, uint8_t* y, int width) { 120 int i; 121 for (i = 0; i < width; ++i) { 122 const uint32_t p = argb[i]; 123 y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff, 124 YUV_HALF); 125 } 126 } 127 128 void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v, 129 int src_width, int do_store) { 130 // No rounding. Last pixel is dealt with separately. 131 const int uv_width = src_width >> 1; 132 int i; 133 for (i = 0; i < uv_width; ++i) { 134 const uint32_t v0 = argb[2 * i + 0]; 135 const uint32_t v1 = argb[2 * i + 1]; 136 // VP8RGBToU/V expects four accumulated pixels. Hence we need to 137 // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less. 138 const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe); 139 const int g = ((v0 >> 7) & 0x1fe) + ((v1 >> 7) & 0x1fe); 140 const int b = ((v0 << 1) & 0x1fe) + ((v1 << 1) & 0x1fe); 141 const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2); 142 const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2); 143 if (do_store) { 144 u[i] = tmp_u; 145 v[i] = tmp_v; 146 } else { 147 // Approximated average-of-four. But it's an acceptable diff. 148 u[i] = (u[i] + tmp_u + 1) >> 1; 149 v[i] = (v[i] + tmp_v + 1) >> 1; 150 } 151 } 152 if (src_width & 1) { // last pixel 153 const uint32_t v0 = argb[2 * i + 0]; 154 const int r = (v0 >> 14) & 0x3fc; 155 const int g = (v0 >> 6) & 0x3fc; 156 const int b = (v0 << 2) & 0x3fc; 157 const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2); 158 const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2); 159 if (do_store) { 160 u[i] = tmp_u; 161 v[i] = tmp_v; 162 } else { 163 u[i] = (u[i] + tmp_u + 1) >> 1; 164 v[i] = (v[i] + tmp_v + 1) >> 1; 165 } 166 } 167 } 168 169 //----------------------------------------------------------------------------- 170 171 static void ConvertRGB24ToY_C(const uint8_t* rgb, uint8_t* y, int width) { 172 int i; 173 for (i = 0; i < width; ++i, rgb += 3) { 174 y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF); 175 } 176 } 177 178 static void ConvertBGR24ToY_C(const uint8_t* bgr, uint8_t* y, int width) { 179 int i; 180 for (i = 0; i < width; ++i, bgr += 3) { 181 y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF); 182 } 183 } 184 185 void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, 186 uint8_t* u, uint8_t* v, int width) { 187 int i; 188 for (i = 0; i < width; i += 1, rgb += 4) { 189 const int r = rgb[0], g = rgb[1], b = rgb[2]; 190 u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2); 191 v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2); 192 } 193 } 194 195 //----------------------------------------------------------------------------- 196 197 #if !WEBP_NEON_OMIT_C_CODE 198 #define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic 199 static uint16_t clip_y(int v) { 200 return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; 201 } 202 203 static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src, 204 uint16_t* dst, int len) { 205 uint64_t diff = 0; 206 int i; 207 for (i = 0; i < len; ++i) { 208 const int diff_y = ref[i] - src[i]; 209 const int new_y = (int)dst[i] + diff_y; 210 dst[i] = clip_y(new_y); 211 diff += (uint64_t)abs(diff_y); 212 } 213 return diff; 214 } 215 216 static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src, 217 int16_t* dst, int len) { 218 int i; 219 for (i = 0; i < len; ++i) { 220 const int diff_uv = ref[i] - src[i]; 221 dst[i] += diff_uv; 222 } 223 } 224 225 static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len, 226 const uint16_t* best_y, uint16_t* out) { 227 int i; 228 for (i = 0; i < len; ++i, ++A, ++B) { 229 const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4; 230 const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4; 231 out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); 232 out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); 233 } 234 } 235 #endif // !WEBP_NEON_OMIT_C_CODE 236 237 #undef MAX_Y 238 239 //----------------------------------------------------------------------------- 240 241 void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width); 242 void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width); 243 void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb, 244 uint8_t* u, uint8_t* v, int width); 245 246 void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width); 247 void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v, 248 int src_width, int do_store); 249 250 uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src, 251 uint16_t* dst, int len); 252 void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src, 253 int16_t* dst, int len); 254 void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len, 255 const uint16_t* best_y, uint16_t* out); 256 257 extern void WebPInitConvertARGBToYUVSSE2(void); 258 extern void WebPInitConvertARGBToYUVSSE41(void); 259 extern void WebPInitConvertARGBToYUVNEON(void); 260 extern void WebPInitSharpYUVSSE2(void); 261 extern void WebPInitSharpYUVNEON(void); 262 263 WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { 264 WebPConvertARGBToY = ConvertARGBToY_C; 265 WebPConvertARGBToUV = WebPConvertARGBToUV_C; 266 267 WebPConvertRGB24ToY = ConvertRGB24ToY_C; 268 WebPConvertBGR24ToY = ConvertBGR24ToY_C; 269 270 WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C; 271 272 #if !WEBP_NEON_OMIT_C_CODE 273 WebPSharpYUVUpdateY = SharpYUVUpdateY_C; 274 WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C; 275 WebPSharpYUVFilterRow = SharpYUVFilterRow_C; 276 #endif 277 278 if (VP8GetCPUInfo != NULL) { 279 #if defined(WEBP_USE_SSE2) 280 if (VP8GetCPUInfo(kSSE2)) { 281 WebPInitConvertARGBToYUVSSE2(); 282 WebPInitSharpYUVSSE2(); 283 } 284 #endif // WEBP_USE_SSE2 285 #if defined(WEBP_USE_SSE41) 286 if (VP8GetCPUInfo(kSSE4_1)) { 287 WebPInitConvertARGBToYUVSSE41(); 288 } 289 #endif // WEBP_USE_SSE41 290 } 291 292 #if defined(WEBP_USE_NEON) 293 if (WEBP_NEON_OMIT_C_CODE || 294 (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { 295 WebPInitConvertARGBToYUVNEON(); 296 WebPInitSharpYUVNEON(); 297 } 298 #endif // WEBP_USE_NEON 299 300 assert(WebPConvertARGBToY != NULL); 301 assert(WebPConvertARGBToUV != NULL); 302 assert(WebPConvertRGB24ToY != NULL); 303 assert(WebPConvertBGR24ToY != NULL); 304 assert(WebPConvertRGBA32ToUV != NULL); 305 assert(WebPSharpYUVUpdateY != NULL); 306 assert(WebPSharpYUVUpdateRGB != NULL); 307 assert(WebPSharpYUVFilterRow != NULL); 308 } 309