1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/row.h" 12 13 #include <string.h> // For memcpy 14 15 #include "libyuv/basic_types.h" 16 17 #ifdef __cplusplus 18 namespace libyuv { 19 extern "C" { 20 #endif 21 22 void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int width) { 23 for (int x = 0; x < width; ++x) { 24 // To support in-place conversion. 25 uint8 a = src_bgra[0]; 26 uint8 r = src_bgra[1]; 27 uint8 g = src_bgra[2]; 28 uint8 b = src_bgra[3]; 29 dst_argb[0] = b; 30 dst_argb[1] = g; 31 dst_argb[2] = r; 32 dst_argb[3] = a; 33 dst_argb += 4; 34 src_bgra += 4; 35 } 36 } 37 38 void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int width) { 39 for (int x = 0; x < width; ++x) { 40 // To support in-place conversion. 41 uint8 r = src_abgr[0]; 42 uint8 g = src_abgr[1]; 43 uint8 b = src_abgr[2]; 44 uint8 a = src_abgr[3]; 45 dst_argb[0] = b; 46 dst_argb[1] = g; 47 dst_argb[2] = r; 48 dst_argb[3] = a; 49 dst_argb += 4; 50 src_abgr += 4; 51 } 52 } 53 54 void RGBAToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int width) { 55 for (int x = 0; x < width; ++x) { 56 // To support in-place conversion. 57 uint8 a = src_abgr[0]; 58 uint8 b = src_abgr[1]; 59 uint8 g = src_abgr[2]; 60 uint8 r = src_abgr[3]; 61 dst_argb[0] = b; 62 dst_argb[1] = g; 63 dst_argb[2] = r; 64 dst_argb[3] = a; 65 dst_argb += 4; 66 src_abgr += 4; 67 } 68 } 69 70 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { 71 for (int x = 0; x < width; ++x) { 72 uint8 b = src_rgb24[0]; 73 uint8 g = src_rgb24[1]; 74 uint8 r = src_rgb24[2]; 75 dst_argb[0] = b; 76 dst_argb[1] = g; 77 dst_argb[2] = r; 78 dst_argb[3] = 255u; 79 dst_argb += 4; 80 src_rgb24 += 3; 81 } 82 } 83 84 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { 85 for (int x = 0; x < width; ++x) { 86 uint8 r = src_raw[0]; 87 uint8 g = src_raw[1]; 88 uint8 b = src_raw[2]; 89 dst_argb[0] = b; 90 dst_argb[1] = g; 91 dst_argb[2] = r; 92 dst_argb[3] = 255u; 93 dst_argb += 4; 94 src_raw += 3; 95 } 96 } 97 98 void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) { 99 for (int x = 0; x < width; ++x) { 100 uint8 b = src_rgb[0] & 0x1f; 101 uint8 g = (src_rgb[0] >> 5) | ((src_rgb[1] & 0x07) << 3); 102 uint8 r = src_rgb[1] >> 3; 103 dst_argb[0] = (b << 3) | (b >> 2); 104 dst_argb[1] = (g << 2) | (g >> 4); 105 dst_argb[2] = (r << 3) | (r >> 2); 106 dst_argb[3] = 255u; 107 dst_argb += 4; 108 src_rgb += 2; 109 } 110 } 111 112 void ARGB1555ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) { 113 for (int x = 0; x < width; ++x) { 114 uint8 b = src_rgb[0] & 0x1f; 115 uint8 g = (src_rgb[0] >> 5) | ((src_rgb[1] & 0x03) << 3); 116 uint8 r = (src_rgb[1] & 0x7c) >> 2; 117 uint8 a = src_rgb[1] >> 7; 118 dst_argb[0] = (b << 3) | (b >> 2); 119 dst_argb[1] = (g << 3) | (g >> 2); 120 dst_argb[2] = (r << 3) | (r >> 2); 121 dst_argb[3] = -a; 122 dst_argb += 4; 123 src_rgb += 2; 124 } 125 } 126 127 void ARGB4444ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) { 128 for (int x = 0; x < width; ++x) { 129 uint8 b = src_rgb[0] & 0x0f; 130 uint8 g = src_rgb[0] >> 4; 131 uint8 r = src_rgb[1] & 0x0f; 132 uint8 a = src_rgb[1] >> 4; 133 dst_argb[0] = (b << 4) | b; 134 dst_argb[1] = (g << 4) | g; 135 dst_argb[2] = (r << 4) | r; 136 dst_argb[3] = (a << 4) | a; 137 dst_argb += 4; 138 src_rgb += 2; 139 } 140 } 141 142 void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width) { 143 for (int x = 0; x < width; ++x) { 144 uint8 b = src_argb[0]; 145 uint8 g = src_argb[1]; 146 uint8 r = src_argb[2]; 147 uint8 a = src_argb[3]; 148 dst_rgb[0] = a; 149 dst_rgb[1] = b; 150 dst_rgb[2] = g; 151 dst_rgb[3] = r; 152 dst_rgb += 4; 153 src_argb += 4; 154 } 155 } 156 157 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 158 for (int x = 0; x < width; ++x) { 159 uint8 b = src_argb[0]; 160 uint8 g = src_argb[1]; 161 uint8 r = src_argb[2]; 162 dst_rgb[0] = b; 163 dst_rgb[1] = g; 164 dst_rgb[2] = r; 165 dst_rgb += 3; 166 src_argb += 4; 167 } 168 } 169 170 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { 171 for (int x = 0; x < width; ++x) { 172 uint8 b = src_argb[0]; 173 uint8 g = src_argb[1]; 174 uint8 r = src_argb[2]; 175 dst_rgb[0] = r; 176 dst_rgb[1] = g; 177 dst_rgb[2] = b; 178 dst_rgb += 3; 179 src_argb += 4; 180 } 181 } 182 183 // TODO(fbarchard): support big endian CPU 184 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 185 for (int x = 0; x < width - 1; x += 2) { 186 uint8 b0 = src_argb[0] >> 3; 187 uint8 g0 = src_argb[1] >> 2; 188 uint8 r0 = src_argb[2] >> 3; 189 uint8 b1 = src_argb[4] >> 3; 190 uint8 g1 = src_argb[5] >> 2; 191 uint8 r1 = src_argb[6] >> 3; 192 *reinterpret_cast<uint32*>(dst_rgb) = b0 | (g0 << 5) | (r0 << 11) | 193 (b1 << 16) | (g1 << 21) | (r1 << 27); 194 dst_rgb += 4; 195 src_argb += 8; 196 } 197 if (width & 1) { 198 uint8 b0 = src_argb[0] >> 3; 199 uint8 g0 = src_argb[1] >> 2; 200 uint8 r0 = src_argb[2] >> 3; 201 *reinterpret_cast<uint16*>(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); 202 } 203 } 204 205 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 206 for (int x = 0; x < width - 1; x += 2) { 207 uint8 b0 = src_argb[0] >> 3; 208 uint8 g0 = src_argb[1] >> 3; 209 uint8 r0 = src_argb[2] >> 3; 210 uint8 a0 = src_argb[3] >> 7; 211 uint8 b1 = src_argb[4] >> 3; 212 uint8 g1 = src_argb[5] >> 3; 213 uint8 r1 = src_argb[6] >> 3; 214 uint8 a1 = src_argb[7] >> 7; 215 *reinterpret_cast<uint32*>(dst_rgb) = 216 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | 217 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); 218 dst_rgb += 4; 219 src_argb += 8; 220 } 221 if (width & 1) { 222 uint8 b0 = src_argb[0] >> 3; 223 uint8 g0 = src_argb[1] >> 3; 224 uint8 r0 = src_argb[2] >> 3; 225 uint8 a0 = src_argb[3] >> 7; 226 *reinterpret_cast<uint16*>(dst_rgb) = 227 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); 228 } 229 } 230 231 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 232 for (int x = 0; x < width - 1; x += 2) { 233 uint8 b0 = src_argb[0] >> 4; 234 uint8 g0 = src_argb[1] >> 4; 235 uint8 r0 = src_argb[2] >> 4; 236 uint8 a0 = src_argb[3] >> 4; 237 uint8 b1 = src_argb[4] >> 4; 238 uint8 g1 = src_argb[5] >> 4; 239 uint8 r1 = src_argb[6] >> 4; 240 uint8 a1 = src_argb[7] >> 4; 241 *reinterpret_cast<uint32*>(dst_rgb) = 242 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | 243 (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28); 244 dst_rgb += 4; 245 src_argb += 8; 246 } 247 if (width & 1) { 248 uint8 b0 = src_argb[0] >> 4; 249 uint8 g0 = src_argb[1] >> 4; 250 uint8 r0 = src_argb[2] >> 4; 251 uint8 a0 = src_argb[3] >> 4; 252 *reinterpret_cast<uint16*>(dst_rgb) = 253 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); 254 } 255 } 256 257 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) { 258 return (( 66 * r + 129 * g + 25 * b + 128) >> 8) + 16; 259 } 260 261 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) { 262 return ((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128; 263 } 264 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) { 265 return ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128; 266 } 267 268 #define MAKEROWY(NAME, R, G, B) \ 269 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ 270 for (int x = 0; x < width; ++x) { \ 271 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ 272 src_argb0 += 4; \ 273 dst_y += 1; \ 274 } \ 275 } \ 276 void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ 277 uint8* dst_u, uint8* dst_v, int width) { \ 278 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ 279 for (int x = 0; x < width - 1; x += 2) { \ 280 uint8 ab = (src_rgb0[B] + src_rgb0[B + 4] + \ 281 src_rgb1[B] + src_rgb1[B + 4]) >> 2; \ 282 uint8 ag = (src_rgb0[G] + src_rgb0[G + 4] + \ 283 src_rgb1[G] + src_rgb1[G + 4]) >> 2; \ 284 uint8 ar = (src_rgb0[R] + src_rgb0[R + 4] + \ 285 src_rgb1[R] + src_rgb1[R + 4]) >> 2; \ 286 dst_u[0] = RGBToU(ar, ag, ab); \ 287 dst_v[0] = RGBToV(ar, ag, ab); \ 288 src_rgb0 += 8; \ 289 src_rgb1 += 8; \ 290 dst_u += 1; \ 291 dst_v += 1; \ 292 } \ 293 if (width & 1) { \ 294 uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ 295 uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ 296 uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ 297 dst_u[0] = RGBToU(ar, ag, ab); \ 298 dst_v[0] = RGBToV(ar, ag, ab); \ 299 } \ 300 } 301 302 MAKEROWY(ARGB, 2, 1, 0) 303 MAKEROWY(BGRA, 1, 2, 3) 304 MAKEROWY(ABGR, 0, 1, 2) 305 MAKEROWY(RGBA, 3, 2, 1) 306 307 // http://en.wikipedia.org/wiki/Grayscale. 308 // 0.11 * B + 0.59 * G + 0.30 * R 309 // Coefficients rounded to multiple of 2 for consistency with SSSE3 version. 310 static __inline int RGBToGray(uint8 r, uint8 g, uint8 b) { 311 return (( 76 * r + 152 * g + 28 * b) >> 8); 312 } 313 314 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 315 for (int x = 0; x < width; ++x) { 316 uint8 y = RGBToGray(src_argb[2], src_argb[1], src_argb[0]); 317 dst_argb[2] = dst_argb[1] = dst_argb[0] = y; 318 dst_argb[3] = src_argb[3]; 319 dst_argb += 4; 320 src_argb += 4; 321 } 322 } 323 324 // Convert a row of image to Sepia tone. 325 void ARGBSepiaRow_C(uint8* dst_argb, int width) { 326 for (int x = 0; x < width; ++x) { 327 int b = dst_argb[0]; 328 int g = dst_argb[1]; 329 int r = dst_argb[2]; 330 int sb = (b * 17 + g * 68 + r * 35) >> 7; 331 int sg = (b * 22 + g * 88 + r * 45) >> 7; 332 int sr = (b * 24 + g * 98 + r * 50) >> 7; 333 // b does not over flow. a is preserved from original. 334 if (sg > 255) { 335 sg = 255; 336 } 337 if (sr > 255) { 338 sr = 255; 339 } 340 dst_argb[0] = sb; 341 dst_argb[1] = sg; 342 dst_argb[2] = sr; 343 dst_argb += 4; 344 } 345 } 346 347 // Apply color matrix to a row of image. Matrix is signed. 348 void ARGBColorMatrixRow_C(uint8* dst_argb, const int8* matrix_argb, int width) { 349 for (int x = 0; x < width; ++x) { 350 int b = dst_argb[0]; 351 int g = dst_argb[1]; 352 int r = dst_argb[2]; 353 int a = dst_argb[3]; 354 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + 355 r * matrix_argb[2] + a * matrix_argb[3]) >> 7; 356 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + 357 r * matrix_argb[6] + a * matrix_argb[7]) >> 7; 358 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + 359 r * matrix_argb[10] + a * matrix_argb[11]) >> 7; 360 if (sb < 0) { 361 sb = 0; 362 } 363 if (sb > 255) { 364 sb = 255; 365 } 366 if (sg < 0) { 367 sg = 0; 368 } 369 if (sg > 255) { 370 sg = 255; 371 } 372 if (sr < 0) { 373 sr = 0; 374 } 375 if (sr > 255) { 376 sr = 255; 377 } 378 dst_argb[0] = sb; 379 dst_argb[1] = sg; 380 dst_argb[2] = sr; 381 dst_argb += 4; 382 } 383 } 384 385 // Apply color table to a row of image. 386 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { 387 for (int x = 0; x < width; ++x) { 388 int b = dst_argb[0]; 389 int g = dst_argb[1]; 390 int r = dst_argb[2]; 391 int a = dst_argb[3]; 392 dst_argb[0] = table_argb[b * 4 + 0]; 393 dst_argb[1] = table_argb[g * 4 + 1]; 394 dst_argb[2] = table_argb[r * 4 + 2]; 395 dst_argb[3] = table_argb[a * 4 + 3]; 396 dst_argb += 4; 397 } 398 } 399 400 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, 401 int interval_offset, int width) { 402 for (int x = 0; x < width; ++x) { 403 int b = dst_argb[0]; 404 int g = dst_argb[1]; 405 int r = dst_argb[2]; 406 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; 407 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset; 408 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset; 409 dst_argb += 4; 410 } 411 } 412 413 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { 414 // Copy a Y to RGB. 415 for (int x = 0; x < width; ++x) { 416 uint8 y = src_y[0]; 417 dst_argb[2] = dst_argb[1] = dst_argb[0] = y; 418 dst_argb[3] = 255u; 419 dst_argb += 4; 420 ++src_y; 421 } 422 } 423 424 // C reference code that mimics the YUV assembly. 425 426 #define YG 74 /* static_cast<int8>(1.164 * 64 + 0.5) */ 427 428 #define UB 127 /* min(63,static_cast<int8>(2.018 * 64)) */ 429 #define UG -25 /* static_cast<int8>(-0.391 * 64 - 0.5) */ 430 #define UR 0 431 432 #define VB 0 433 #define VG -52 /* static_cast<int8>(-0.813 * 64 - 0.5) */ 434 #define VR 102 /* static_cast<int8>(1.596 * 64 + 0.5) */ 435 436 // Bias 437 #define BB UB * 128 + VB * 128 438 #define BG UG * 128 + VG * 128 439 #define BR UR * 128 + VR * 128 440 441 static __inline uint32 Clip(int32 val) { 442 if (val < 0) { 443 return static_cast<uint32>(0); 444 } else if (val > 255) { 445 return static_cast<uint32>(255); 446 } 447 return static_cast<uint32>(val); 448 } 449 450 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf, 451 int ashift, int rshift, int gshift, int bshift) { 452 int32 y1 = (static_cast<int32>(y) - 16) * YG; 453 uint32 b = Clip(static_cast<int32>((u * UB + v * VB) - (BB) + y1) >> 6); 454 uint32 g = Clip(static_cast<int32>((u * UG + v * VG) - (BG) + y1) >> 6); 455 uint32 r = Clip(static_cast<int32>((u * UR + v * VR) - (BR) + y1) >> 6); 456 *reinterpret_cast<uint32*>(rgb_buf) = (b << bshift) | 457 (g << gshift) | 458 (r << rshift) | 459 (255u << ashift); 460 } 461 462 static __inline void YuvPixel2(uint8 y, uint8 u, uint8 v, 463 uint8* b, uint8* g, uint8* r) { 464 int32 y1 = (static_cast<int32>(y) - 16) * YG; 465 *b = Clip(static_cast<int32>((u * UB + v * VB) - (BB) + y1) >> 6); 466 *g = Clip(static_cast<int32>((u * UG + v * VG) - (BG) + y1) >> 6); 467 *r = Clip(static_cast<int32>((u * UR + v * VR) - (BR) + y1) >> 6); 468 } 469 470 void I444ToARGBRow_C(const uint8* y_buf, 471 const uint8* u_buf, 472 const uint8* v_buf, 473 uint8* rgb_buf, 474 int width) { 475 for (int x = 0; x < width; ++x) { 476 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 24, 16, 8, 0); 477 y_buf += 1; 478 u_buf += 1; 479 v_buf += 1; 480 rgb_buf += 4; // Advance 1 pixel. 481 } 482 } 483 484 // Also used for 420 485 void I422ToARGBRow_C(const uint8* y_buf, 486 const uint8* u_buf, 487 const uint8* v_buf, 488 uint8* rgb_buf, 489 int width) { 490 for (int x = 0; x < width - 1; x += 2) { 491 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); 492 YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0); 493 y_buf += 2; 494 u_buf += 1; 495 v_buf += 1; 496 rgb_buf += 8; // Advance 2 pixels. 497 } 498 if (width & 1) { 499 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); 500 } 501 } 502 503 void I422ToRGB24Row_C(const uint8* y_buf, 504 const uint8* u_buf, 505 const uint8* v_buf, 506 uint8* rgb_buf, 507 int width) { 508 for (int x = 0; x < width - 1; x += 2) { 509 YuvPixel2(y_buf[0], u_buf[0], v_buf[0], 510 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 511 YuvPixel2(y_buf[1], u_buf[0], v_buf[0], 512 rgb_buf + 3, rgb_buf + 4, rgb_buf + 5); 513 y_buf += 2; 514 u_buf += 1; 515 v_buf += 1; 516 rgb_buf += 6; // Advance 2 pixels. 517 } 518 if (width & 1) { 519 YuvPixel2(y_buf[0], u_buf[0], v_buf[0], 520 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 521 } 522 } 523 524 void I422ToRAWRow_C(const uint8* y_buf, 525 const uint8* u_buf, 526 const uint8* v_buf, 527 uint8* rgb_buf, 528 int width) { 529 for (int x = 0; x < width - 1; x += 2) { 530 YuvPixel2(y_buf[0], u_buf[0], v_buf[0], 531 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); 532 YuvPixel2(y_buf[1], u_buf[0], v_buf[0], 533 rgb_buf + 5, rgb_buf + 4, rgb_buf + 3); 534 y_buf += 2; 535 u_buf += 1; 536 v_buf += 1; 537 rgb_buf += 6; // Advance 2 pixels. 538 } 539 if (width & 1) { 540 YuvPixel2(y_buf[0], u_buf[0], v_buf[0], 541 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 542 } 543 } 544 545 void I411ToARGBRow_C(const uint8* y_buf, 546 const uint8* u_buf, 547 const uint8* v_buf, 548 uint8* rgb_buf, 549 int width) { 550 for (int x = 0; x < width - 3; x += 4) { 551 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); 552 YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0); 553 YuvPixel(y_buf[2], u_buf[0], v_buf[0], rgb_buf + 8, 24, 16, 8, 0); 554 YuvPixel(y_buf[3], u_buf[0], v_buf[0], rgb_buf + 12, 24, 16, 8, 0); 555 y_buf += 4; 556 u_buf += 1; 557 v_buf += 1; 558 rgb_buf += 16; // Advance 4 pixels. 559 } 560 if (width & 2) { 561 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); 562 YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0); 563 y_buf += 2; 564 rgb_buf += 8; // Advance 2 pixels. 565 } 566 if (width & 1) { 567 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); 568 } 569 } 570 571 void NV12ToARGBRow_C(const uint8* y_buf, 572 const uint8* uv_buf, 573 uint8* rgb_buf, 574 int width) { 575 for (int x = 0; x < width - 1; x += 2) { 576 YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0); 577 YuvPixel(y_buf[1], uv_buf[0], uv_buf[1], rgb_buf + 4, 24, 16, 8, 0); 578 y_buf += 2; 579 uv_buf += 2; 580 rgb_buf += 8; // Advance 2 pixels. 581 } 582 if (width & 1) { 583 YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0); 584 } 585 } 586 587 void NV21ToARGBRow_C(const uint8* y_buf, 588 const uint8* vu_buf, 589 uint8* rgb_buf, 590 int width) { 591 for (int x = 0; x < width - 1; x += 2) { 592 YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0); 593 YuvPixel(y_buf[1], vu_buf[1], vu_buf[0], rgb_buf + 4, 24, 16, 8, 0); 594 y_buf += 2; 595 vu_buf += 2; 596 rgb_buf += 8; // Advance 2 pixels. 597 } 598 if (width & 1) { 599 YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0); 600 } 601 } 602 603 void I422ToBGRARow_C(const uint8* y_buf, 604 const uint8* u_buf, 605 const uint8* v_buf, 606 uint8* rgb_buf, 607 int width) { 608 for (int x = 0; x < width - 1; x += 2) { 609 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 8, 16, 24); 610 YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 8, 16, 24); 611 y_buf += 2; 612 u_buf += 1; 613 v_buf += 1; 614 rgb_buf += 8; // Advance 2 pixels. 615 } 616 if (width & 1) { 617 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 0, 8, 16, 24); 618 } 619 } 620 621 void I422ToABGRRow_C(const uint8* y_buf, 622 const uint8* u_buf, 623 const uint8* v_buf, 624 uint8* rgb_buf, 625 int width) { 626 for (int x = 0; x < width - 1; x += 2) { 627 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16); 628 YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 0, 8, 16); 629 y_buf += 2; 630 u_buf += 1; 631 v_buf += 1; 632 rgb_buf += 8; // Advance 2 pixels. 633 } 634 if (width & 1) { 635 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16); 636 } 637 } 638 639 void I422ToRGBARow_C(const uint8* y_buf, 640 const uint8* u_buf, 641 const uint8* v_buf, 642 uint8* rgb_buf, 643 int width) { 644 for (int x = 0; x < width - 1; x += 2) { 645 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 24, 16, 8); 646 YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 24, 16, 8); 647 y_buf += 2; 648 u_buf += 1; 649 v_buf += 1; 650 rgb_buf += 8; // Advance 2 pixels. 651 } 652 if (width & 1) { 653 YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 24, 16, 8); 654 } 655 } 656 657 void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) { 658 for (int x = 0; x < width; ++x) { 659 YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0); 660 y_buf += 1; 661 rgb_buf += 4; // Advance 1 pixel. 662 } 663 } 664 665 void MirrorRow_C(const uint8* src, uint8* dst, int width) { 666 src += width - 1; 667 for (int x = 0; x < width - 1; x += 2) { 668 dst[x] = src[0]; 669 dst[x + 1] = src[-1]; 670 src -= 2; 671 } 672 if (width & 1) { 673 dst[width - 1] = src[0]; 674 } 675 } 676 677 void MirrorRowUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 678 src_uv += (width - 1) << 1; 679 for (int x = 0; x < width - 1; x += 2) { 680 dst_u[x] = src_uv[0]; 681 dst_u[x + 1] = src_uv[-2]; 682 dst_v[x] = src_uv[1]; 683 dst_v[x + 1] = src_uv[-2 + 1]; 684 src_uv -= 4; 685 } 686 if (width & 1) { 687 dst_u[width - 1] = src_uv[0]; 688 dst_v[width - 1] = src_uv[1]; 689 } 690 } 691 692 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { 693 const uint32* src32 = reinterpret_cast<const uint32*>(src); 694 uint32* dst32 = reinterpret_cast<uint32*>(dst); 695 src32 += width - 1; 696 for (int x = 0; x < width - 1; x += 2) { 697 dst32[x] = src32[0]; 698 dst32[x + 1] = src32[-1]; 699 src32 -= 2; 700 } 701 if (width & 1) { 702 dst32[width - 1] = src32[0]; 703 } 704 } 705 706 void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 707 for (int x = 0; x < width - 1; x += 2) { 708 dst_u[x] = src_uv[0]; 709 dst_u[x + 1] = src_uv[2]; 710 dst_v[x] = src_uv[1]; 711 dst_v[x + 1] = src_uv[3]; 712 src_uv += 4; 713 } 714 if (width & 1) { 715 dst_u[width - 1] = src_uv[0]; 716 dst_v[width - 1] = src_uv[1]; 717 } 718 } 719 720 void CopyRow_C(const uint8* src, uint8* dst, int count) { 721 memcpy(dst, src, count); 722 } 723 724 void SetRow8_C(uint8* dst, uint32 v8, int count) { 725 #ifdef _MSC_VER 726 // VC will generate rep stosb. 727 for (int x = 0; x < count; ++x) { 728 dst[x] = v8; 729 } 730 #else 731 memset(dst, v8, count); 732 #endif 733 } 734 735 void SetRows32_C(uint8* dst, uint32 v32, int width, 736 int dst_stride, int height) { 737 for (int y = 0; y < height; ++y) { 738 uint32* d = reinterpret_cast<uint32*>(dst); 739 for (int x = 0; x < width; ++x) { 740 d[x] = v32; 741 } 742 dst += dst_stride; 743 } 744 } 745 746 // Filter 2 rows of YUY2 UV's (422) into U and V (420). 747 void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2, 748 uint8* dst_u, uint8* dst_v, int width) { 749 // Output a row of UV values, filtering 2 rows of YUY2. 750 for (int x = 0; x < width; x += 2) { 751 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1; 752 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1; 753 src_yuy2 += 4; 754 dst_u += 1; 755 dst_v += 1; 756 } 757 } 758 759 // Copy row of YUY2 UV's (422) into U and V (422). 760 void YUY2ToUV422Row_C(const uint8* src_yuy2, 761 uint8* dst_u, uint8* dst_v, int width) { 762 // Output a row of UV values. 763 for (int x = 0; x < width; x += 2) { 764 dst_u[0] = src_yuy2[1]; 765 dst_v[0] = src_yuy2[3]; 766 src_yuy2 += 4; 767 dst_u += 1; 768 dst_v += 1; 769 } 770 } 771 772 // Copy row of YUY2 Y's (422) into Y (420/422). 773 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) { 774 // Output a row of Y values. 775 for (int x = 0; x < width - 1; x += 2) { 776 dst_y[x] = src_yuy2[0]; 777 dst_y[x + 1] = src_yuy2[2]; 778 src_yuy2 += 4; 779 } 780 if (width & 1) { 781 dst_y[width - 1] = src_yuy2[0]; 782 } 783 } 784 785 // Filter 2 rows of UYVY UV's (422) into U and V (420). 786 void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy, 787 uint8* dst_u, uint8* dst_v, int width) { 788 // Output a row of UV values. 789 for (int x = 0; x < width; x += 2) { 790 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1; 791 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1; 792 src_uyvy += 4; 793 dst_u += 1; 794 dst_v += 1; 795 } 796 } 797 798 // Copy row of UYVY UV's (422) into U and V (422). 799 void UYVYToUV422Row_C(const uint8* src_uyvy, 800 uint8* dst_u, uint8* dst_v, int width) { 801 // Output a row of UV values. 802 for (int x = 0; x < width; x += 2) { 803 dst_u[0] = src_uyvy[0]; 804 dst_v[0] = src_uyvy[2]; 805 src_uyvy += 4; 806 dst_u += 1; 807 dst_v += 1; 808 } 809 } 810 811 // Copy row of UYVY Y's (422) into Y (420/422). 812 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) { 813 // Output a row of Y values. 814 for (int x = 0; x < width - 1; x += 2) { 815 dst_y[x] = src_uyvy[1]; 816 dst_y[x + 1] = src_uyvy[3]; 817 src_uyvy += 4; 818 } 819 if (width & 1) { 820 dst_y[width - 1] = src_uyvy[1]; 821 } 822 } 823 824 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f 825 826 // Blend src_argb0 over src_argb1 and store to dst_argb. 827 // dst_argb may be src_argb0 or src_argb1. 828 // This code mimics the SSSE3 version for better testability. 829 void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, 830 uint8* dst_argb, int width) { 831 for (int x = 0; x < width - 1; x += 2) { 832 uint32 fb = src_argb0[0]; 833 uint32 fg = src_argb0[1]; 834 uint32 fr = src_argb0[2]; 835 uint32 a = src_argb0[3]; 836 uint32 bb = src_argb1[0]; 837 uint32 bg = src_argb1[1]; 838 uint32 br = src_argb1[2]; 839 dst_argb[0] = BLEND(fb, bb, a); 840 dst_argb[1] = BLEND(fg, bg, a); 841 dst_argb[2] = BLEND(fr, br, a); 842 dst_argb[3] = 255u; 843 844 fb = src_argb0[4 + 0]; 845 fg = src_argb0[4 + 1]; 846 fr = src_argb0[4 + 2]; 847 a = src_argb0[4 + 3]; 848 bb = src_argb1[4 + 0]; 849 bg = src_argb1[4 + 1]; 850 br = src_argb1[4 + 2]; 851 dst_argb[4 + 0] = BLEND(fb, bb, a); 852 dst_argb[4 + 1] = BLEND(fg, bg, a); 853 dst_argb[4 + 2] = BLEND(fr, br, a); 854 dst_argb[4 + 3] = 255u; 855 src_argb0 += 8; 856 src_argb1 += 8; 857 dst_argb += 8; 858 } 859 860 if (width & 1) { 861 uint32 fb = src_argb0[0]; 862 uint32 fg = src_argb0[1]; 863 uint32 fr = src_argb0[2]; 864 uint32 a = src_argb0[3]; 865 uint32 bb = src_argb1[0]; 866 uint32 bg = src_argb1[1]; 867 uint32 br = src_argb1[2]; 868 dst_argb[0] = BLEND(fb, bb, a); 869 dst_argb[1] = BLEND(fg, bg, a); 870 dst_argb[2] = BLEND(fr, br, a); 871 dst_argb[3] = 255u; 872 } 873 } 874 #undef BLEND 875 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24 876 877 // Multiply source RGB by alpha and store to destination. 878 // This code mimics the SSSE3 version for better testability. 879 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 880 for (int i = 0; i < width - 1; i += 2) { 881 uint32 b = src_argb[0]; 882 uint32 g = src_argb[1]; 883 uint32 r = src_argb[2]; 884 uint32 a = src_argb[3]; 885 dst_argb[0] = ATTENUATE(b, a); 886 dst_argb[1] = ATTENUATE(g, a); 887 dst_argb[2] = ATTENUATE(r, a); 888 dst_argb[3] = a; 889 b = src_argb[4]; 890 g = src_argb[5]; 891 r = src_argb[6]; 892 a = src_argb[7]; 893 dst_argb[4] = ATTENUATE(b, a); 894 dst_argb[5] = ATTENUATE(g, a); 895 dst_argb[6] = ATTENUATE(r, a); 896 dst_argb[7] = a; 897 src_argb += 8; 898 dst_argb += 8; 899 } 900 901 if (width & 1) { 902 const uint32 b = src_argb[0]; 903 const uint32 g = src_argb[1]; 904 const uint32 r = src_argb[2]; 905 const uint32 a = src_argb[3]; 906 dst_argb[0] = ATTENUATE(b, a); 907 dst_argb[1] = ATTENUATE(g, a); 908 dst_argb[2] = ATTENUATE(r, a); 909 dst_argb[3] = a; 910 } 911 } 912 #undef ATTENUATE 913 914 // Divide source RGB by alpha and store to destination. 915 // b = (b * 255 + (a / 2)) / a; 916 // g = (g * 255 + (a / 2)) / a; 917 // r = (r * 255 + (a / 2)) / a; 918 // Reciprocal method is off by 1 on some values. ie 125 919 // 8.16 fixed point inverse table 920 #define T(a) 0x10000 / a 921 uint32 fixed_invtbl8[256] = { 922 0x0100, T(0x01), T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07), 923 T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f), 924 T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17), 925 T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f), 926 T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), 927 T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), 928 T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37), 929 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f), 930 T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47), 931 T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f), 932 T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57), 933 T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), 934 T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), 935 T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f), 936 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77), 937 T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f), 938 T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87), 939 T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f), 940 T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), 941 T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), 942 T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7), 943 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf), 944 T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7), 945 T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf), 946 T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7), 947 T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), 948 T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), 949 T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf), 950 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7), 951 T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef), 952 T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7), 953 T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x0100 }; 954 #undef T 955 956 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 957 for (int i = 0; i < width; ++i) { 958 uint32 b = src_argb[0]; 959 uint32 g = src_argb[1]; 960 uint32 r = src_argb[2]; 961 const uint32 a = src_argb[3]; 962 if (a) { 963 const uint32 ia = fixed_invtbl8[a]; // 8.16 fixed point 964 b = (b * ia) >> 8; 965 g = (g * ia) >> 8; 966 r = (r * ia) >> 8; 967 // Clamping should not be necessary but is free in assembly. 968 if (b > 255) { 969 b = 255; 970 } 971 if (g > 255) { 972 g = 255; 973 } 974 if (r > 255) { 975 r = 255; 976 } 977 } 978 dst_argb[0] = b; 979 dst_argb[1] = g; 980 dst_argb[2] = r; 981 dst_argb[3] = a; 982 src_argb += 4; 983 dst_argb += 4; 984 } 985 } 986 987 // Wrappers to handle odd width 988 #define YANY(NAMEANY, I420TORGB_SSE, I420TORGB_C, UV_SHIFT) \ 989 void NAMEANY(const uint8* y_buf, \ 990 const uint8* u_buf, \ 991 const uint8* v_buf, \ 992 uint8* rgb_buf, \ 993 int width) { \ 994 int n = width & ~7; \ 995 I420TORGB_SSE(y_buf, u_buf, v_buf, rgb_buf, n); \ 996 I420TORGB_C(y_buf + n, \ 997 u_buf + (n >> UV_SHIFT), \ 998 v_buf + (n >> UV_SHIFT), \ 999 rgb_buf + n * 4, width & 7); \ 1000 } 1001 1002 // Wrappers to handle odd width 1003 #define Y2NY(NAMEANY, NV12TORGB_SSE, NV12TORGB_C, UV_SHIFT) \ 1004 void NAMEANY(const uint8* y_buf, \ 1005 const uint8* uv_buf, \ 1006 uint8* rgb_buf, \ 1007 int width) { \ 1008 int n = width & ~7; \ 1009 NV12TORGB_SSE(y_buf, uv_buf, rgb_buf, n); \ 1010 NV12TORGB_C(y_buf + n, \ 1011 uv_buf + (n >> UV_SHIFT), \ 1012 rgb_buf + n * 4, width & 7); \ 1013 } 1014 1015 1016 #ifdef HAS_I422TOARGBROW_SSSE3 1017 YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, 0) 1018 YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, 1) 1019 YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, 2) 1020 Y2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C, 0) 1021 Y2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, 0) 1022 YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1) 1023 YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1) 1024 #endif 1025 #ifdef HAS_I422TORGB24ROW_SSSE3 1026 YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_Unaligned_SSSE3, \ 1027 I422ToRGB24Row_C, 1) 1028 YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_Unaligned_SSSE3, I422ToRAWRow_C, 1) 1029 #endif 1030 #ifdef HAS_I422TORGBAROW_SSSE3 1031 YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1) 1032 #endif 1033 #ifdef HAS_I422TOARGBROW_NEON 1034 YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1) 1035 YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1) 1036 YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1) 1037 YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1) 1038 Y2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0) 1039 Y2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0) 1040 YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1) 1041 YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1) 1042 #endif 1043 #undef YANY 1044 1045 #define RGBANY(NAMEANY, ARGBTORGB, BPP) \ 1046 void NAMEANY(const uint8* argb_buf, \ 1047 uint8* rgb_buf, \ 1048 int width) { \ 1049 SIMD_ALIGNED(uint8 row[kMaxStride]); \ 1050 ARGBTORGB(argb_buf, row, width); \ 1051 memcpy(rgb_buf, row, width * BPP); \ 1052 } 1053 1054 #if defined(HAS_ARGBTORGB24ROW_SSSE3) 1055 RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 3) 1056 RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 3) 1057 RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 2) 1058 RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 2) 1059 RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 2) 1060 #endif 1061 #if defined(HAS_ARGBTORGB24ROW_NEON) 1062 RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 3) 1063 RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 3) 1064 #endif 1065 #undef RGBANY 1066 1067 #define YANY(NAMEANY, ARGBTOY_SSE, BPP) \ 1068 void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ 1069 ARGBTOY_SSE(src_argb, dst_y, width - 16); \ 1070 ARGBTOY_SSE(src_argb + (width - 16) * BPP, dst_y + (width - 16), 16); \ 1071 } 1072 1073 #ifdef HAS_ARGBTOYROW_SSSE3 1074 YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4) 1075 YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4) 1076 YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4) 1077 #endif 1078 #ifdef HAS_RGBATOYROW_SSSE3 1079 YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4) 1080 #endif 1081 #ifdef HAS_YUY2TOYROW_SSE2 1082 YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2) 1083 YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2) 1084 #endif 1085 #ifdef HAS_YUY2TOYROW_NEON 1086 YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2) 1087 YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2) 1088 #endif 1089 #undef YANY 1090 1091 #define UVANY(NAMEANY, ANYTOUV_SSE, ANYTOUV_C, BPP) \ 1092 void NAMEANY(const uint8* src_argb, int src_stride_argb, \ 1093 uint8* dst_u, uint8* dst_v, int width) { \ 1094 int n = width & ~15; \ 1095 ANYTOUV_SSE(src_argb, src_stride_argb, dst_u, dst_v, n); \ 1096 ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \ 1097 dst_u + (n >> 1), \ 1098 dst_v + (n >> 1), \ 1099 width & 15); \ 1100 } 1101 1102 #ifdef HAS_ARGBTOUVROW_SSSE3 1103 UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4) 1104 UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4) 1105 UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4) 1106 #endif 1107 #ifdef HAS_RGBATOYROW_SSSE3 1108 UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4) 1109 #endif 1110 #ifdef HAS_YUY2TOUVROW_SSE2 1111 UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2) 1112 UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2) 1113 #endif 1114 #ifdef HAS_YUY2TOUVROW_NEON 1115 UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2) 1116 UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2) 1117 #endif 1118 #undef UVANY 1119 1120 #define UV422ANY(NAMEANY, ANYTOUV_SSE, ANYTOUV_C, BPP) \ 1121 void NAMEANY(const uint8* src_argb, \ 1122 uint8* dst_u, uint8* dst_v, int width) { \ 1123 int n = width & ~15; \ 1124 ANYTOUV_SSE(src_argb, dst_u, dst_v, n); \ 1125 ANYTOUV_C(src_argb + n * BPP, \ 1126 dst_u + (n >> 1), \ 1127 dst_v + (n >> 1), \ 1128 width & 15); \ 1129 } 1130 1131 #ifdef HAS_YUY2TOUV422ROW_SSE2 1132 UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2, \ 1133 YUY2ToUV422Row_C, 2) 1134 UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2, \ 1135 UYVYToUV422Row_C, 2) 1136 #endif 1137 #ifdef HAS_YUY2TOUV422ROW_NEON 1138 UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, \ 1139 YUY2ToUV422Row_C, 2) 1140 UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, \ 1141 UYVYToUV422Row_C, 2) 1142 #endif 1143 #undef UV422ANY 1144 1145 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, 1146 const int32* previous_cumsum, int width) { 1147 int32 row_sum[4] = {0, 0, 0, 0}; 1148 for (int x = 0; x < width; ++x) { 1149 row_sum[0] += row[x * 4 + 0]; 1150 row_sum[1] += row[x * 4 + 1]; 1151 row_sum[2] += row[x * 4 + 2]; 1152 row_sum[3] += row[x * 4 + 3]; 1153 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0]; 1154 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1]; 1155 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2]; 1156 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3]; 1157 } 1158 } 1159 1160 void CumulativeSumToAverage_C(const int32* tl, const int32* bl, 1161 int w, int area, uint8* dst, int count) { 1162 float ooa = 1.0f / area; 1163 for (int i = 0; i < count; ++i) { 1164 dst[0] = static_cast<uint8>((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); 1165 dst[1] = static_cast<uint8>((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); 1166 dst[2] = static_cast<uint8>((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); 1167 dst[3] = static_cast<uint8>((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); 1168 dst += 4; 1169 tl += 4; 1170 bl += 4; 1171 } 1172 } 1173 1174 #define REPEAT8(v) (v) | ((v) << 8) 1175 #define SHADE(f, v) v * f >> 24 1176 1177 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, 1178 uint32 value) { 1179 const uint32 b_scale = REPEAT8(value & 0xff); 1180 const uint32 g_scale = REPEAT8((value >> 8) & 0xff); 1181 const uint32 r_scale = REPEAT8((value >> 16) & 0xff); 1182 const uint32 a_scale = REPEAT8(value >> 24); 1183 1184 for (int i = 0; i < width; ++i) { 1185 const uint32 b = REPEAT8(src_argb[0]); 1186 const uint32 g = REPEAT8(src_argb[1]); 1187 const uint32 r = REPEAT8(src_argb[2]); 1188 const uint32 a = REPEAT8(src_argb[3]); 1189 dst_argb[0] = SHADE(b, b_scale); 1190 dst_argb[1] = SHADE(g, g_scale); 1191 dst_argb[2] = SHADE(r, r_scale); 1192 dst_argb[3] = SHADE(a, a_scale); 1193 src_argb += 4; 1194 dst_argb += 4; 1195 } 1196 } 1197 #undef REPEAT8 1198 #undef SHADE 1199 1200 // Copy pixels from rotated source to destination row with a slope. 1201 LIBYUV_API 1202 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, 1203 uint8* dst_argb, const float* uv_dudv, int width) { 1204 // Render a row of pixels from source into a buffer. 1205 float uv[2]; 1206 uv[0] = uv_dudv[0]; 1207 uv[1] = uv_dudv[1]; 1208 for (int i = 0; i < width; ++i) { 1209 int x = static_cast<int>(uv[0]); 1210 int y = static_cast<int>(uv[1]); 1211 *reinterpret_cast<uint32*>(dst_argb) = 1212 *reinterpret_cast<const uint32*>(src_argb + y * src_argb_stride + 1213 x * 4); 1214 dst_argb += 4; 1215 uv[0] += uv_dudv[2]; 1216 uv[1] += uv_dudv[3]; 1217 } 1218 } 1219 1220 // C version 2x2 -> 2x1. 1221 void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, 1222 ptrdiff_t src_stride, 1223 int dst_width, int source_y_fraction) { 1224 int y1_fraction = source_y_fraction; 1225 int y0_fraction = 256 - y1_fraction; 1226 const uint8* src_ptr1 = src_ptr + src_stride; 1227 uint8* end = dst_ptr + (dst_width << 2); 1228 do { 1229 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 1230 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; 1231 dst_ptr[2] = (src_ptr[2] * y0_fraction + src_ptr1[2] * y1_fraction) >> 8; 1232 dst_ptr[3] = (src_ptr[3] * y0_fraction + src_ptr1[3] * y1_fraction) >> 8; 1233 dst_ptr[4] = (src_ptr[4] * y0_fraction + src_ptr1[4] * y1_fraction) >> 8; 1234 dst_ptr[5] = (src_ptr[5] * y0_fraction + src_ptr1[5] * y1_fraction) >> 8; 1235 dst_ptr[6] = (src_ptr[6] * y0_fraction + src_ptr1[6] * y1_fraction) >> 8; 1236 dst_ptr[7] = (src_ptr[7] * y0_fraction + src_ptr1[7] * y1_fraction) >> 8; 1237 src_ptr += 8; 1238 src_ptr1 += 8; 1239 dst_ptr += 8; 1240 } while (dst_ptr < end); 1241 } 1242 1243 #ifdef __cplusplus 1244 } // extern "C" 1245 } // namespace libyuv 1246 #endif 1247