1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "build/build_config.h" 6 #include "media/base/simd/convert_rgb_to_yuv.h" 7 #include "media/base/simd/yuv_to_rgb_table.h" 8 9 #if defined(COMPILER_MSVC) 10 #include <intrin.h> 11 #else 12 #include <mmintrin.h> 13 #include <emmintrin.h> 14 #endif 15 16 namespace media { 17 18 #define FIX_SHIFT 12 19 #define FIX(x) ((x) * (1 << FIX_SHIFT)) 20 21 // Define a convenient macro to do static cast. 22 #define INT16_FIX(x) static_cast<int16>(FIX(x)) 23 24 // Android's pixel layout is RGBA, while other platforms 25 // are BGRA. 26 #if defined(OS_ANDROID) 27 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = { 28 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, 29 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, 30 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, 31 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, 32 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, 33 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, 34 }; 35 #else 36 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = { 37 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, 38 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, 39 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, 40 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, 41 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, 42 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, 43 }; 44 #endif 45 46 #undef INT16_FIX 47 48 // This is the final offset for the conversion from signed yuv values to 49 // unsigned values. It is arranged so that offset of 16 is applied to Y 50 // components and 128 is added to UV components for 2 pixels. 51 SIMD_ALIGNED(const int32 kYOffset[4]) = {16, 16, 16, 16}; 52 53 static inline int Clamp(int value) { 54 if (value < 0) 55 return 0; 56 if (value > 255) 57 return 255; 58 return value; 59 } 60 61 static inline int RGBToY(int r, int g, int b) { 62 int y = ConvertRGBAToYUV_kTable[0] * b + 63 ConvertRGBAToYUV_kTable[1] * g + 64 ConvertRGBAToYUV_kTable[2] * r; 65 y >>= FIX_SHIFT; 66 return Clamp(y + 16); 67 } 68 69 static inline int RGBToU(int r, int g, int b, int shift) { 70 int u = ConvertRGBAToYUV_kTable[8] * b + 71 ConvertRGBAToYUV_kTable[9] * g + 72 ConvertRGBAToYUV_kTable[10] * r; 73 u >>= FIX_SHIFT + shift; 74 return Clamp(u + 128); 75 } 76 77 static inline int RGBToV(int r, int g, int b, int shift) { 78 int v = ConvertRGBAToYUV_kTable[16] * b + 79 ConvertRGBAToYUV_kTable[17] * g + 80 ConvertRGBAToYUV_kTable[18] * r; 81 v >>= FIX_SHIFT + shift; 82 return Clamp(v + 128); 83 } 84 85 #define CONVERT_Y(rgb_buf, y_buf) \ 86 b = *rgb_buf++; \ 87 g = *rgb_buf++; \ 88 r = *rgb_buf++; \ 89 ++rgb_buf; \ 90 sum_b += b; \ 91 sum_g += g; \ 92 sum_r += r; \ 93 *y_buf++ = RGBToY(r, g, b); 94 95 static inline void ConvertRGBToYUV_V2H2(const uint8* rgb_buf_1, 96 const uint8* rgb_buf_2, 97 uint8* y_buf_1, 98 uint8* y_buf_2, 99 uint8* u_buf, 100 uint8* v_buf) { 101 int sum_b = 0; 102 int sum_g = 0; 103 int sum_r = 0; 104 int r, g, b; 105 106 107 108 CONVERT_Y(rgb_buf_1, y_buf_1); 109 CONVERT_Y(rgb_buf_1, y_buf_1); 110 CONVERT_Y(rgb_buf_2, y_buf_2); 111 CONVERT_Y(rgb_buf_2, y_buf_2); 112 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2); 113 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2); 114 } 115 116 static inline void ConvertRGBToYUV_V2H1(const uint8* rgb_buf_1, 117 const uint8* rgb_buf_2, 118 uint8* y_buf_1, 119 uint8* y_buf_2, 120 uint8* u_buf, 121 uint8* v_buf) { 122 int sum_b = 0; 123 int sum_g = 0; 124 int sum_r = 0; 125 int r, g, b; 126 127 CONVERT_Y(rgb_buf_1, y_buf_1); 128 CONVERT_Y(rgb_buf_2, y_buf_2); 129 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); 130 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); 131 } 132 133 static inline void ConvertRGBToYUV_V1H2(const uint8* rgb_buf, 134 uint8* y_buf, 135 uint8* u_buf, 136 uint8* v_buf) { 137 int sum_b = 0; 138 int sum_g = 0; 139 int sum_r = 0; 140 int r, g, b; 141 142 CONVERT_Y(rgb_buf, y_buf); 143 CONVERT_Y(rgb_buf, y_buf); 144 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); 145 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); 146 } 147 148 static inline void ConvertRGBToYUV_V1H1(const uint8* rgb_buf, 149 uint8* y_buf, 150 uint8* u_buf, 151 uint8* v_buf) { 152 int sum_b = 0; 153 int sum_g = 0; 154 int sum_r = 0; 155 int r, g, b; 156 157 CONVERT_Y(rgb_buf, y_buf); 158 *u_buf++ = RGBToU(r, g, b, 0); 159 *v_buf++ = RGBToV(r, g, b, 0); 160 } 161 162 static void ConvertRGB32ToYUVRow_SSE2(const uint8* rgb_buf_1, 163 const uint8* rgb_buf_2, 164 uint8* y_buf_1, 165 uint8* y_buf_2, 166 uint8* u_buf, 167 uint8* v_buf, 168 int width) { 169 while (width >= 4) { 170 // Name for the Y pixels: 171 // Row 1: a b c d 172 // Row 2: e f g h 173 // 174 // First row 4 pixels. 175 __m128i rgb_row_1 = _mm_loadu_si128( 176 reinterpret_cast<const __m128i*>(rgb_buf_1)); 177 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1); 178 179 __m128i y_table = _mm_load_si128( 180 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable)); 181 182 __m128i rgb_a_b = _mm_unpackhi_epi8(rgb_row_1, zero_1); 183 rgb_a_b = _mm_madd_epi16(rgb_a_b, y_table); 184 185 __m128i rgb_c_d = _mm_unpacklo_epi8(rgb_row_1, zero_1); 186 rgb_c_d = _mm_madd_epi16(rgb_c_d, y_table); 187 188 // Do a crazh shuffle so that we get: 189 // v------------ Multiply Add 190 // BG: a b c d 191 // A0: a b c d 192 __m128i bg_abcd = _mm_castps_si128( 193 _mm_shuffle_ps( 194 _mm_castsi128_ps(rgb_c_d), 195 _mm_castsi128_ps(rgb_a_b), 196 (3 << 6) | (1 << 4) | (3 << 2) | 1)); 197 __m128i r_abcd = _mm_castps_si128( 198 _mm_shuffle_ps( 199 _mm_castsi128_ps(rgb_c_d), 200 _mm_castsi128_ps(rgb_a_b), 201 (2 << 6) | (2 << 2))); 202 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd); 203 204 // Down shift back to 8bits range. 205 __m128i y_offset = _mm_load_si128( 206 reinterpret_cast<const __m128i*>(kYOffset)); 207 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT); 208 y_abcd = _mm_add_epi32(y_abcd, y_offset); 209 y_abcd = _mm_packs_epi32(y_abcd, y_abcd); 210 y_abcd = _mm_packus_epi16(y_abcd, y_abcd); 211 *reinterpret_cast<uint32*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd); 212 y_buf_1 += 4; 213 214 // Second row 4 pixels. 215 __m128i rgb_row_2 = _mm_loadu_si128( 216 reinterpret_cast<const __m128i*>(rgb_buf_2)); 217 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2); 218 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2); 219 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2); 220 221 // Add two rows together. 222 __m128i rgb_ae_bf = 223 _mm_add_epi16(_mm_unpackhi_epi8(rgb_row_1, zero_2), rgb_e_f); 224 __m128i rgb_cg_dh = 225 _mm_add_epi16(_mm_unpacklo_epi8(rgb_row_1, zero_2), rgb_g_h); 226 227 // Multiply add like the previous row. 228 rgb_e_f = _mm_madd_epi16(rgb_e_f, y_table); 229 rgb_g_h = _mm_madd_epi16(rgb_g_h, y_table); 230 231 __m128i bg_efgh = _mm_castps_si128( 232 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), 233 _mm_castsi128_ps(rgb_e_f), 234 (3 << 6) | (1 << 4) | (3 << 2) | 1)); 235 __m128i r_efgh = _mm_castps_si128( 236 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), 237 _mm_castsi128_ps(rgb_e_f), 238 (2 << 6) | (2 << 2))); 239 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh); 240 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT); 241 y_efgh = _mm_add_epi32(y_efgh, y_offset); 242 y_efgh = _mm_packs_epi32(y_efgh, y_efgh); 243 y_efgh = _mm_packus_epi16(y_efgh, y_efgh); 244 *reinterpret_cast<uint32*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh); 245 y_buf_2 += 4; 246 247 __m128i rgb_ae_cg = _mm_castps_si128( 248 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), 249 _mm_castsi128_ps(rgb_ae_bf), 250 (3 << 6) | (2 << 4) | (3 << 2) | 2)); 251 __m128i rgb_bf_dh = _mm_castps_si128( 252 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), 253 _mm_castsi128_ps(rgb_ae_bf), 254 (1 << 6) | (1 << 2))); 255 256 // This is a 2x2 subsampling for 2 pixels. 257 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh); 258 259 // Do a multiply add with U table. 260 __m128i u_a_b = _mm_madd_epi16( 261 rgb_abef_cdgh, 262 _mm_load_si128( 263 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8))); 264 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)), 265 _mm_shuffle_epi32(u_a_b, (2 << 2))); 266 // Right shift 14 because of 12 from fixed point and 2 from subsampling. 267 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2); 268 __m128i uv_offset = _mm_slli_epi32(y_offset, 3); 269 u_a_b = _mm_add_epi32(u_a_b, uv_offset); 270 u_a_b = _mm_packs_epi32(u_a_b, u_a_b); 271 u_a_b = _mm_packus_epi16(u_a_b, u_a_b); 272 *reinterpret_cast<uint16*>(u_buf) = _mm_extract_epi16(u_a_b, 0); 273 u_buf += 2; 274 275 __m128i v_a_b = _mm_madd_epi16( 276 rgb_abef_cdgh, 277 _mm_load_si128( 278 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16))); 279 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)), 280 _mm_shuffle_epi32(v_a_b, (2 << 2))); 281 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2); 282 v_a_b = _mm_add_epi32(v_a_b, uv_offset); 283 v_a_b = _mm_packs_epi32(v_a_b, v_a_b); 284 v_a_b = _mm_packus_epi16(v_a_b, v_a_b); 285 *reinterpret_cast<uint16*>(v_buf) = _mm_extract_epi16(v_a_b, 0); 286 v_buf += 2; 287 288 rgb_buf_1 += 16; 289 rgb_buf_2 += 16; 290 291 // Move forward by 4 pixels. 292 width -= 4; 293 } 294 295 // Just use C code to convert the remaining pixels. 296 if (width >= 2) { 297 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); 298 rgb_buf_1 += 8; 299 rgb_buf_2 += 8; 300 y_buf_1 += 2; 301 y_buf_2 += 2; 302 ++u_buf; 303 ++v_buf; 304 width -= 2; 305 } 306 307 if (width) 308 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); 309 } 310 311 extern void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe, 312 uint8* yplane, 313 uint8* uplane, 314 uint8* vplane, 315 int width, 316 int height, 317 int rgbstride, 318 int ystride, 319 int uvstride) { 320 while (height >= 2) { 321 ConvertRGB32ToYUVRow_SSE2(rgbframe, 322 rgbframe + rgbstride, 323 yplane, 324 yplane + ystride, 325 uplane, 326 vplane, 327 width); 328 rgbframe += 2 * rgbstride; 329 yplane += 2 * ystride; 330 uplane += uvstride; 331 vplane += uvstride; 332 height -= 2; 333 } 334 335 if (!height) 336 return; 337 338 // Handle the last row. 339 while (width >= 2) { 340 ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane); 341 rgbframe += 8; 342 yplane += 2; 343 ++uplane; 344 ++vplane; 345 width -= 2; 346 } 347 348 if (width) 349 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); 350 } 351 352 void ConvertRGB32ToYUV_SSE2_Reference(const uint8* rgbframe, 353 uint8* yplane, 354 uint8* uplane, 355 uint8* vplane, 356 int width, 357 int height, 358 int rgbstride, 359 int ystride, 360 int uvstride) { 361 while (height >= 2) { 362 int i = 0; 363 364 // Convert a 2x2 block. 365 while (i + 2 <= width) { 366 ConvertRGBToYUV_V2H2(rgbframe + i * 4, 367 rgbframe + rgbstride + i * 4, 368 yplane + i, 369 yplane + ystride + i, 370 uplane + i / 2, 371 vplane + i / 2); 372 i += 2; 373 } 374 375 // Convert the last pixel of two rows. 376 if (i < width) { 377 ConvertRGBToYUV_V2H1(rgbframe + i * 4, 378 rgbframe + rgbstride + i * 4, 379 yplane + i, 380 yplane + ystride + i, 381 uplane + i / 2, 382 vplane + i / 2); 383 } 384 385 rgbframe += 2 * rgbstride; 386 yplane += 2 * ystride; 387 uplane += uvstride; 388 vplane += uvstride; 389 height -= 2; 390 } 391 392 if (!height) 393 return; 394 395 // Handle the last row. 396 while (width >= 2) { 397 ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane); 398 rgbframe += 8; 399 yplane += 2; 400 ++uplane; 401 ++vplane; 402 width -= 2; 403 } 404 405 // Handle the last pixel in the last row. 406 if (width) 407 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); 408 } 409 410 } // namespace media 411