1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "build/build_config.h" 6 #include "media/base/simd/convert_rgb_to_yuv.h" 7 #include "media/base/simd/yuv_to_rgb_table.h" 8 9 #if defined(COMPILER_MSVC) 10 #include <intrin.h> 11 #else 12 #include <mmintrin.h> 13 #include <emmintrin.h> 14 #endif 15 16 namespace media { 17 18 #define FIX_SHIFT 12 19 #define FIX(x) ((x) * (1 << FIX_SHIFT)) 20 21 // Define a convenient macro to do static cast. 22 #define INT16_FIX(x) static_cast<int16>(FIX(x)) 23 24 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = { 25 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, 26 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, 27 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, 28 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, 29 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, 30 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, 31 }; 32 33 #undef INT16_FIX 34 35 // This is the final offset for the conversion from signed yuv values to 36 // unsigned values. It is arranged so that offset of 16 is applied to Y 37 // components and 128 is added to UV components for 2 pixels. 38 SIMD_ALIGNED(const int32 kYOffset[4]) = {16, 16, 16, 16}; 39 40 static inline int Clamp(int value) { 41 if (value < 0) 42 return 0; 43 if (value > 255) 44 return 255; 45 return value; 46 } 47 48 static inline int RGBToY(int r, int g, int b) { 49 int y = ConvertRGBAToYUV_kTable[0] * b + 50 ConvertRGBAToYUV_kTable[1] * g + 51 ConvertRGBAToYUV_kTable[2] * r; 52 y >>= FIX_SHIFT; 53 return Clamp(y + 16); 54 } 55 56 static inline int RGBToU(int r, int g, int b, int shift) { 57 int u = ConvertRGBAToYUV_kTable[8] * b + 58 ConvertRGBAToYUV_kTable[9] * g + 59 ConvertRGBAToYUV_kTable[10] * r; 60 u >>= FIX_SHIFT + shift; 61 return Clamp(u + 128); 62 } 63 64 static inline int RGBToV(int r, int g, int b, int shift) { 65 int v = ConvertRGBAToYUV_kTable[16] * b + 66 ConvertRGBAToYUV_kTable[17] * g + 67 ConvertRGBAToYUV_kTable[18] * r; 68 v >>= FIX_SHIFT + shift; 69 return Clamp(v + 128); 70 } 71 72 #define CONVERT_Y(rgb_buf, y_buf) \ 73 b = *rgb_buf++; \ 74 g = *rgb_buf++; \ 75 r = *rgb_buf++; \ 76 ++rgb_buf; \ 77 sum_b += b; \ 78 sum_g += g; \ 79 sum_r += r; \ 80 *y_buf++ = RGBToY(r, g, b); 81 82 static inline void ConvertRGBToYUV_V2H2(const uint8* rgb_buf_1, 83 const uint8* rgb_buf_2, 84 uint8* y_buf_1, 85 uint8* y_buf_2, 86 uint8* u_buf, 87 uint8* v_buf) { 88 int sum_b = 0; 89 int sum_g = 0; 90 int sum_r = 0; 91 int r, g, b; 92 93 94 95 CONVERT_Y(rgb_buf_1, y_buf_1); 96 CONVERT_Y(rgb_buf_1, y_buf_1); 97 CONVERT_Y(rgb_buf_2, y_buf_2); 98 CONVERT_Y(rgb_buf_2, y_buf_2); 99 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2); 100 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2); 101 } 102 103 static inline void ConvertRGBToYUV_V2H1(const uint8* rgb_buf_1, 104 const uint8* rgb_buf_2, 105 uint8* y_buf_1, 106 uint8* y_buf_2, 107 uint8* u_buf, 108 uint8* v_buf) { 109 int sum_b = 0; 110 int sum_g = 0; 111 int sum_r = 0; 112 int r, g, b; 113 114 CONVERT_Y(rgb_buf_1, y_buf_1); 115 CONVERT_Y(rgb_buf_2, y_buf_2); 116 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); 117 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); 118 } 119 120 static inline void ConvertRGBToYUV_V1H2(const uint8* rgb_buf, 121 uint8* y_buf, 122 uint8* u_buf, 123 uint8* v_buf) { 124 int sum_b = 0; 125 int sum_g = 0; 126 int sum_r = 0; 127 int r, g, b; 128 129 CONVERT_Y(rgb_buf, y_buf); 130 CONVERT_Y(rgb_buf, y_buf); 131 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); 132 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); 133 } 134 135 static inline void ConvertRGBToYUV_V1H1(const uint8* rgb_buf, 136 uint8* y_buf, 137 uint8* u_buf, 138 uint8* v_buf) { 139 int sum_b = 0; 140 int sum_g = 0; 141 int sum_r = 0; 142 int r, g, b; 143 144 CONVERT_Y(rgb_buf, y_buf); 145 *u_buf++ = RGBToU(r, g, b, 0); 146 *v_buf++ = RGBToV(r, g, b, 0); 147 } 148 149 static void ConvertRGB32ToYUVRow_SSE2(const uint8* rgb_buf_1, 150 const uint8* rgb_buf_2, 151 uint8* y_buf_1, 152 uint8* y_buf_2, 153 uint8* u_buf, 154 uint8* v_buf, 155 int width) { 156 while (width >= 4) { 157 // Name for the Y pixels: 158 // Row 1: a b c d 159 // Row 2: e f g h 160 // 161 // First row 4 pixels. 162 __m128i rgb_row_1 = _mm_loadu_si128( 163 reinterpret_cast<const __m128i*>(rgb_buf_1)); 164 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1); 165 166 __m128i y_table = _mm_load_si128( 167 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable)); 168 169 __m128i rgb_a_b = _mm_unpackhi_epi8(rgb_row_1, zero_1); 170 rgb_a_b = _mm_madd_epi16(rgb_a_b, y_table); 171 172 __m128i rgb_c_d = _mm_unpacklo_epi8(rgb_row_1, zero_1); 173 rgb_c_d = _mm_madd_epi16(rgb_c_d, y_table); 174 175 // Do a crazh shuffle so that we get: 176 // v------------ Multiply Add 177 // BG: a b c d 178 // A0: a b c d 179 __m128i bg_abcd = _mm_castps_si128( 180 _mm_shuffle_ps( 181 _mm_castsi128_ps(rgb_c_d), 182 _mm_castsi128_ps(rgb_a_b), 183 (3 << 6) | (1 << 4) | (3 << 2) | 1)); 184 __m128i r_abcd = _mm_castps_si128( 185 _mm_shuffle_ps( 186 _mm_castsi128_ps(rgb_c_d), 187 _mm_castsi128_ps(rgb_a_b), 188 (2 << 6) | (2 << 2))); 189 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd); 190 191 // Down shift back to 8bits range. 192 __m128i y_offset = _mm_load_si128( 193 reinterpret_cast<const __m128i*>(kYOffset)); 194 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT); 195 y_abcd = _mm_add_epi32(y_abcd, y_offset); 196 y_abcd = _mm_packs_epi32(y_abcd, y_abcd); 197 y_abcd = _mm_packus_epi16(y_abcd, y_abcd); 198 *reinterpret_cast<uint32*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd); 199 y_buf_1 += 4; 200 201 // Second row 4 pixels. 202 __m128i rgb_row_2 = _mm_loadu_si128( 203 reinterpret_cast<const __m128i*>(rgb_buf_2)); 204 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2); 205 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2); 206 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2); 207 208 // Add two rows together. 209 __m128i rgb_ae_bf = 210 _mm_add_epi16(_mm_unpackhi_epi8(rgb_row_1, zero_2), rgb_e_f); 211 __m128i rgb_cg_dh = 212 _mm_add_epi16(_mm_unpacklo_epi8(rgb_row_1, zero_2), rgb_g_h); 213 214 // Multiply add like the previous row. 215 rgb_e_f = _mm_madd_epi16(rgb_e_f, y_table); 216 rgb_g_h = _mm_madd_epi16(rgb_g_h, y_table); 217 218 __m128i bg_efgh = _mm_castps_si128( 219 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), 220 _mm_castsi128_ps(rgb_e_f), 221 (3 << 6) | (1 << 4) | (3 << 2) | 1)); 222 __m128i r_efgh = _mm_castps_si128( 223 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), 224 _mm_castsi128_ps(rgb_e_f), 225 (2 << 6) | (2 << 2))); 226 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh); 227 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT); 228 y_efgh = _mm_add_epi32(y_efgh, y_offset); 229 y_efgh = _mm_packs_epi32(y_efgh, y_efgh); 230 y_efgh = _mm_packus_epi16(y_efgh, y_efgh); 231 *reinterpret_cast<uint32*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh); 232 y_buf_2 += 4; 233 234 __m128i rgb_ae_cg = _mm_castps_si128( 235 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), 236 _mm_castsi128_ps(rgb_ae_bf), 237 (3 << 6) | (2 << 4) | (3 << 2) | 2)); 238 __m128i rgb_bf_dh = _mm_castps_si128( 239 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), 240 _mm_castsi128_ps(rgb_ae_bf), 241 (1 << 6) | (1 << 2))); 242 243 // This is a 2x2 subsampling for 2 pixels. 244 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh); 245 246 // Do a multiply add with U table. 247 __m128i u_a_b = _mm_madd_epi16( 248 rgb_abef_cdgh, 249 _mm_load_si128( 250 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8))); 251 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)), 252 _mm_shuffle_epi32(u_a_b, (2 << 2))); 253 // Right shift 14 because of 12 from fixed point and 2 from subsampling. 254 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2); 255 __m128i uv_offset = _mm_slli_epi32(y_offset, 3); 256 u_a_b = _mm_add_epi32(u_a_b, uv_offset); 257 u_a_b = _mm_packs_epi32(u_a_b, u_a_b); 258 u_a_b = _mm_packus_epi16(u_a_b, u_a_b); 259 *reinterpret_cast<uint16*>(u_buf) = _mm_extract_epi16(u_a_b, 0); 260 u_buf += 2; 261 262 __m128i v_a_b = _mm_madd_epi16( 263 rgb_abef_cdgh, 264 _mm_load_si128( 265 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16))); 266 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)), 267 _mm_shuffle_epi32(v_a_b, (2 << 2))); 268 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2); 269 v_a_b = _mm_add_epi32(v_a_b, uv_offset); 270 v_a_b = _mm_packs_epi32(v_a_b, v_a_b); 271 v_a_b = _mm_packus_epi16(v_a_b, v_a_b); 272 *reinterpret_cast<uint16*>(v_buf) = _mm_extract_epi16(v_a_b, 0); 273 v_buf += 2; 274 275 rgb_buf_1 += 16; 276 rgb_buf_2 += 16; 277 278 // Move forward by 4 pixels. 279 width -= 4; 280 } 281 282 // Just use C code to convert the remaining pixels. 283 if (width >= 2) { 284 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); 285 rgb_buf_1 += 8; 286 rgb_buf_2 += 8; 287 y_buf_1 += 2; 288 y_buf_2 += 2; 289 ++u_buf; 290 ++v_buf; 291 width -= 2; 292 } 293 294 if (width) 295 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); 296 } 297 298 extern void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe, 299 uint8* yplane, 300 uint8* uplane, 301 uint8* vplane, 302 int width, 303 int height, 304 int rgbstride, 305 int ystride, 306 int uvstride) { 307 while (height >= 2) { 308 ConvertRGB32ToYUVRow_SSE2(rgbframe, 309 rgbframe + rgbstride, 310 yplane, 311 yplane + ystride, 312 uplane, 313 vplane, 314 width); 315 rgbframe += 2 * rgbstride; 316 yplane += 2 * ystride; 317 uplane += uvstride; 318 vplane += uvstride; 319 height -= 2; 320 } 321 322 if (!height) 323 return; 324 325 // Handle the last row. 326 while (width >= 2) { 327 ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane); 328 rgbframe += 8; 329 yplane += 2; 330 ++uplane; 331 ++vplane; 332 width -= 2; 333 } 334 335 if (width) 336 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); 337 } 338 339 void ConvertRGB32ToYUV_SSE2_Reference(const uint8* rgbframe, 340 uint8* yplane, 341 uint8* uplane, 342 uint8* vplane, 343 int width, 344 int height, 345 int rgbstride, 346 int ystride, 347 int uvstride) { 348 while (height >= 2) { 349 int i = 0; 350 351 // Convert a 2x2 block. 352 while (i + 2 <= width) { 353 ConvertRGBToYUV_V2H2(rgbframe + i * 4, 354 rgbframe + rgbstride + i * 4, 355 yplane + i, 356 yplane + ystride + i, 357 uplane + i / 2, 358 vplane + i / 2); 359 i += 2; 360 } 361 362 // Convert the last pixel of two rows. 363 if (i < width) { 364 ConvertRGBToYUV_V2H1(rgbframe + i * 4, 365 rgbframe + rgbstride + i * 4, 366 yplane + i, 367 yplane + ystride + i, 368 uplane + i / 2, 369 vplane + i / 2); 370 } 371 372 rgbframe += 2 * rgbstride; 373 yplane += 2 * ystride; 374 uplane += uvstride; 375 vplane += uvstride; 376 height -= 2; 377 } 378 379 if (!height) 380 return; 381 382 // Handle the last row. 383 while (width >= 2) { 384 ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane); 385 rgbframe += 8; 386 yplane += 2; 387 ++uplane; 388 ++vplane; 389 width -= 2; 390 } 391 392 // Handle the last pixel in the last row. 393 if (width) 394 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); 395 } 396 397 } // namespace media 398