1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/row.h" 12 13 #include <string.h> // For memcpy and memset. 14 15 #include "libyuv/basic_types.h" 16 17 #ifdef __cplusplus 18 namespace libyuv { 19 extern "C" { 20 #endif 21 22 // llvm x86 is poor at ternary operator, so use branchless min/max. 23 24 #define USE_BRANCHLESS 1 25 #if USE_BRANCHLESS 26 static __inline int32 clamp0(int32 v) { 27 return ((-(v) >> 31) & (v)); 28 } 29 30 static __inline int32 clamp255(int32 v) { 31 return (((255 - (v)) >> 31) | (v)) & 255; 32 } 33 34 static __inline uint32 Clamp(int32 val) { 35 int v = clamp0(val); 36 return (uint32)(clamp255(v)); 37 } 38 39 static __inline uint32 Abs(int32 v) { 40 int m = v >> 31; 41 return (v + m) ^ m; 42 } 43 #else // USE_BRANCHLESS 44 static __inline int32 clamp0(int32 v) { 45 return (v < 0) ? 0 : v; 46 } 47 48 static __inline int32 clamp255(int32 v) { 49 return (v > 255) ? 255 : v; 50 } 51 52 static __inline uint32 Clamp(int32 val) { 53 int v = clamp0(val); 54 return (uint32)(clamp255(v)); 55 } 56 57 static __inline uint32 Abs(int32 v) { 58 return (v < 0) ? -v : v; 59 } 60 #endif // USE_BRANCHLESS 61 62 #ifdef LIBYUV_LITTLE_ENDIAN 63 #define WRITEWORD(p, v) *(uint32*)(p) = v 64 #else 65 static inline void WRITEWORD(uint8* p, uint32 v) { 66 p[0] = (uint8)(v & 255); 67 p[1] = (uint8)((v >> 8) & 255); 68 p[2] = (uint8)((v >> 16) & 255); 69 p[3] = (uint8)((v >> 24) & 255); 70 } 71 #endif 72 73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { 74 int x; 75 for (x = 0; x < width; ++x) { 76 uint8 b = src_rgb24[0]; 77 uint8 g = src_rgb24[1]; 78 uint8 r = src_rgb24[2]; 79 dst_argb[0] = b; 80 dst_argb[1] = g; 81 dst_argb[2] = r; 82 dst_argb[3] = 255u; 83 dst_argb += 4; 84 src_rgb24 += 3; 85 } 86 } 87 88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { 89 int x; 90 for (x = 0; x < width; ++x) { 91 uint8 r = src_raw[0]; 92 uint8 g = src_raw[1]; 93 uint8 b = src_raw[2]; 94 dst_argb[0] = b; 95 dst_argb[1] = g; 96 dst_argb[2] = r; 97 dst_argb[3] = 255u; 98 dst_argb += 4; 99 src_raw += 3; 100 } 101 } 102 103 void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) { 104 int x; 105 for (x = 0; x < width; ++x) { 106 uint8 r = src_raw[0]; 107 uint8 g = src_raw[1]; 108 uint8 b = src_raw[2]; 109 dst_rgb24[0] = b; 110 dst_rgb24[1] = g; 111 dst_rgb24[2] = r; 112 dst_rgb24 += 3; 113 src_raw += 3; 114 } 115 } 116 117 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) { 118 int x; 119 for (x = 0; x < width; ++x) { 120 uint8 b = src_rgb565[0] & 0x1f; 121 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 122 uint8 r = src_rgb565[1] >> 3; 123 dst_argb[0] = (b << 3) | (b >> 2); 124 dst_argb[1] = (g << 2) | (g >> 4); 125 dst_argb[2] = (r << 3) | (r >> 2); 126 dst_argb[3] = 255u; 127 dst_argb += 4; 128 src_rgb565 += 2; 129 } 130 } 131 132 void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, 133 int width) { 134 int x; 135 for (x = 0; x < width; ++x) { 136 uint8 b = src_argb1555[0] & 0x1f; 137 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 138 uint8 r = (src_argb1555[1] & 0x7c) >> 2; 139 uint8 a = src_argb1555[1] >> 7; 140 dst_argb[0] = (b << 3) | (b >> 2); 141 dst_argb[1] = (g << 3) | (g >> 2); 142 dst_argb[2] = (r << 3) | (r >> 2); 143 dst_argb[3] = -a; 144 dst_argb += 4; 145 src_argb1555 += 2; 146 } 147 } 148 149 void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb, 150 int width) { 151 int x; 152 for (x = 0; x < width; ++x) { 153 uint8 b = src_argb4444[0] & 0x0f; 154 uint8 g = src_argb4444[0] >> 4; 155 uint8 r = src_argb4444[1] & 0x0f; 156 uint8 a = src_argb4444[1] >> 4; 157 dst_argb[0] = (b << 4) | b; 158 dst_argb[1] = (g << 4) | g; 159 dst_argb[2] = (r << 4) | r; 160 dst_argb[3] = (a << 4) | a; 161 dst_argb += 4; 162 src_argb4444 += 2; 163 } 164 } 165 166 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 167 int x; 168 for (x = 0; x < width; ++x) { 169 uint8 b = src_argb[0]; 170 uint8 g = src_argb[1]; 171 uint8 r = src_argb[2]; 172 dst_rgb[0] = b; 173 dst_rgb[1] = g; 174 dst_rgb[2] = r; 175 dst_rgb += 3; 176 src_argb += 4; 177 } 178 } 179 180 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { 181 int x; 182 for (x = 0; x < width; ++x) { 183 uint8 b = src_argb[0]; 184 uint8 g = src_argb[1]; 185 uint8 r = src_argb[2]; 186 dst_rgb[0] = r; 187 dst_rgb[1] = g; 188 dst_rgb[2] = b; 189 dst_rgb += 3; 190 src_argb += 4; 191 } 192 } 193 194 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 195 int x; 196 for (x = 0; x < width - 1; x += 2) { 197 uint8 b0 = src_argb[0] >> 3; 198 uint8 g0 = src_argb[1] >> 2; 199 uint8 r0 = src_argb[2] >> 3; 200 uint8 b1 = src_argb[4] >> 3; 201 uint8 g1 = src_argb[5] >> 2; 202 uint8 r1 = src_argb[6] >> 3; 203 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | 204 (b1 << 16) | (g1 << 21) | (r1 << 27)); 205 dst_rgb += 4; 206 src_argb += 8; 207 } 208 if (width & 1) { 209 uint8 b0 = src_argb[0] >> 3; 210 uint8 g0 = src_argb[1] >> 2; 211 uint8 r0 = src_argb[2] >> 3; 212 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); 213 } 214 } 215 216 // dither4 is a row of 4 values from 4x4 dither matrix. 217 // The 4x4 matrix contains values to increase RGB. When converting to 218 // fewer bits (565) this provides an ordered dither. 219 // The order in the 4x4 matrix in first byte is upper left. 220 // The 4 values are passed as an int, then referenced as an array, so 221 // endian will not affect order of the original matrix. But the dither4 222 // will containing the first pixel in the lower byte for little endian 223 // or the upper byte for big endian. 224 void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, 225 const uint32 dither4, int width) { 226 int x; 227 for (x = 0; x < width - 1; x += 2) { 228 int dither0 = ((const unsigned char*)(&dither4))[x & 3]; 229 int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3]; 230 uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; 231 uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; 232 uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; 233 uint8 b1 = clamp255(src_argb[4] + dither1) >> 3; 234 uint8 g1 = clamp255(src_argb[5] + dither1) >> 2; 235 uint8 r1 = clamp255(src_argb[6] + dither1) >> 3; 236 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | 237 (b1 << 16) | (g1 << 21) | (r1 << 27)); 238 dst_rgb += 4; 239 src_argb += 8; 240 } 241 if (width & 1) { 242 int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3]; 243 uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; 244 uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; 245 uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; 246 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); 247 } 248 } 249 250 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 251 int x; 252 for (x = 0; x < width - 1; x += 2) { 253 uint8 b0 = src_argb[0] >> 3; 254 uint8 g0 = src_argb[1] >> 3; 255 uint8 r0 = src_argb[2] >> 3; 256 uint8 a0 = src_argb[3] >> 7; 257 uint8 b1 = src_argb[4] >> 3; 258 uint8 g1 = src_argb[5] >> 3; 259 uint8 r1 = src_argb[6] >> 3; 260 uint8 a1 = src_argb[7] >> 7; 261 *(uint32*)(dst_rgb) = 262 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | 263 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); 264 dst_rgb += 4; 265 src_argb += 8; 266 } 267 if (width & 1) { 268 uint8 b0 = src_argb[0] >> 3; 269 uint8 g0 = src_argb[1] >> 3; 270 uint8 r0 = src_argb[2] >> 3; 271 uint8 a0 = src_argb[3] >> 7; 272 *(uint16*)(dst_rgb) = 273 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); 274 } 275 } 276 277 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 278 int x; 279 for (x = 0; x < width - 1; x += 2) { 280 uint8 b0 = src_argb[0] >> 4; 281 uint8 g0 = src_argb[1] >> 4; 282 uint8 r0 = src_argb[2] >> 4; 283 uint8 a0 = src_argb[3] >> 4; 284 uint8 b1 = src_argb[4] >> 4; 285 uint8 g1 = src_argb[5] >> 4; 286 uint8 r1 = src_argb[6] >> 4; 287 uint8 a1 = src_argb[7] >> 4; 288 *(uint32*)(dst_rgb) = 289 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | 290 (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28); 291 dst_rgb += 4; 292 src_argb += 8; 293 } 294 if (width & 1) { 295 uint8 b0 = src_argb[0] >> 4; 296 uint8 g0 = src_argb[1] >> 4; 297 uint8 r0 = src_argb[2] >> 4; 298 uint8 a0 = src_argb[3] >> 4; 299 *(uint16*)(dst_rgb) = 300 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); 301 } 302 } 303 304 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) { 305 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; 306 } 307 308 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) { 309 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; 310 } 311 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) { 312 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; 313 } 314 315 #define MAKEROWY(NAME, R, G, B, BPP) \ 316 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ 317 int x; \ 318 for (x = 0; x < width; ++x) { \ 319 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ 320 src_argb0 += BPP; \ 321 dst_y += 1; \ 322 } \ 323 } \ 324 void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ 325 uint8* dst_u, uint8* dst_v, int width) { \ 326 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ 327 int x; \ 328 for (x = 0; x < width - 1; x += 2) { \ 329 uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \ 330 src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \ 331 uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \ 332 src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \ 333 uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \ 334 src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \ 335 dst_u[0] = RGBToU(ar, ag, ab); \ 336 dst_v[0] = RGBToV(ar, ag, ab); \ 337 src_rgb0 += BPP * 2; \ 338 src_rgb1 += BPP * 2; \ 339 dst_u += 1; \ 340 dst_v += 1; \ 341 } \ 342 if (width & 1) { \ 343 uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ 344 uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ 345 uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ 346 dst_u[0] = RGBToU(ar, ag, ab); \ 347 dst_v[0] = RGBToV(ar, ag, ab); \ 348 } \ 349 } 350 351 MAKEROWY(ARGB, 2, 1, 0, 4) 352 MAKEROWY(BGRA, 1, 2, 3, 4) 353 MAKEROWY(ABGR, 0, 1, 2, 4) 354 MAKEROWY(RGBA, 3, 2, 1, 4) 355 MAKEROWY(RGB24, 2, 1, 0, 3) 356 MAKEROWY(RAW, 0, 1, 2, 3) 357 #undef MAKEROWY 358 359 // JPeg uses a variation on BT.601-1 full range 360 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b 361 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center 362 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center 363 // BT.601 Mpeg range uses: 364 // b 0.1016 * 255 = 25.908 = 25 365 // g 0.5078 * 255 = 129.489 = 129 366 // r 0.2578 * 255 = 65.739 = 66 367 // JPeg 8 bit Y (not used): 368 // b 0.11400 * 256 = 29.184 = 29 369 // g 0.58700 * 256 = 150.272 = 150 370 // r 0.29900 * 256 = 76.544 = 77 371 // JPeg 7 bit Y: 372 // b 0.11400 * 128 = 14.592 = 15 373 // g 0.58700 * 128 = 75.136 = 75 374 // r 0.29900 * 128 = 38.272 = 38 375 // JPeg 8 bit U: 376 // b 0.50000 * 255 = 127.5 = 127 377 // g -0.33126 * 255 = -84.4713 = -84 378 // r -0.16874 * 255 = -43.0287 = -43 379 // JPeg 8 bit V: 380 // b -0.08131 * 255 = -20.73405 = -20 381 // g -0.41869 * 255 = -106.76595 = -107 382 // r 0.50000 * 255 = 127.5 = 127 383 384 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) { 385 return (38 * r + 75 * g + 15 * b + 64) >> 7; 386 } 387 388 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) { 389 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; 390 } 391 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) { 392 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; 393 } 394 395 #define AVGB(a, b) (((a) + (b) + 1) >> 1) 396 397 #define MAKEROWYJ(NAME, R, G, B, BPP) \ 398 void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ 399 int x; \ 400 for (x = 0; x < width; ++x) { \ 401 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ 402 src_argb0 += BPP; \ 403 dst_y += 1; \ 404 } \ 405 } \ 406 void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \ 407 uint8* dst_u, uint8* dst_v, int width) { \ 408 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ 409 int x; \ 410 for (x = 0; x < width - 1; x += 2) { \ 411 uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ 412 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ 413 uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ 414 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ 415 uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ 416 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ 417 dst_u[0] = RGBToUJ(ar, ag, ab); \ 418 dst_v[0] = RGBToVJ(ar, ag, ab); \ 419 src_rgb0 += BPP * 2; \ 420 src_rgb1 += BPP * 2; \ 421 dst_u += 1; \ 422 dst_v += 1; \ 423 } \ 424 if (width & 1) { \ 425 uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \ 426 uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \ 427 uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \ 428 dst_u[0] = RGBToUJ(ar, ag, ab); \ 429 dst_v[0] = RGBToVJ(ar, ag, ab); \ 430 } \ 431 } 432 433 MAKEROWYJ(ARGB, 2, 1, 0, 4) 434 #undef MAKEROWYJ 435 436 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { 437 int x; 438 for (x = 0; x < width; ++x) { 439 uint8 b = src_rgb565[0] & 0x1f; 440 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 441 uint8 r = src_rgb565[1] >> 3; 442 b = (b << 3) | (b >> 2); 443 g = (g << 2) | (g >> 4); 444 r = (r << 3) | (r >> 2); 445 dst_y[0] = RGBToY(r, g, b); 446 src_rgb565 += 2; 447 dst_y += 1; 448 } 449 } 450 451 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) { 452 int x; 453 for (x = 0; x < width; ++x) { 454 uint8 b = src_argb1555[0] & 0x1f; 455 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 456 uint8 r = (src_argb1555[1] & 0x7c) >> 2; 457 b = (b << 3) | (b >> 2); 458 g = (g << 3) | (g >> 2); 459 r = (r << 3) | (r >> 2); 460 dst_y[0] = RGBToY(r, g, b); 461 src_argb1555 += 2; 462 dst_y += 1; 463 } 464 } 465 466 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) { 467 int x; 468 for (x = 0; x < width; ++x) { 469 uint8 b = src_argb4444[0] & 0x0f; 470 uint8 g = src_argb4444[0] >> 4; 471 uint8 r = src_argb4444[1] & 0x0f; 472 b = (b << 4) | b; 473 g = (g << 4) | g; 474 r = (r << 4) | r; 475 dst_y[0] = RGBToY(r, g, b); 476 src_argb4444 += 2; 477 dst_y += 1; 478 } 479 } 480 481 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, 482 uint8* dst_u, uint8* dst_v, int width) { 483 const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565; 484 int x; 485 for (x = 0; x < width - 1; x += 2) { 486 uint8 b0 = src_rgb565[0] & 0x1f; 487 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 488 uint8 r0 = src_rgb565[1] >> 3; 489 uint8 b1 = src_rgb565[2] & 0x1f; 490 uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); 491 uint8 r1 = src_rgb565[3] >> 3; 492 uint8 b2 = next_rgb565[0] & 0x1f; 493 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); 494 uint8 r2 = next_rgb565[1] >> 3; 495 uint8 b3 = next_rgb565[2] & 0x1f; 496 uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); 497 uint8 r3 = next_rgb565[3] >> 3; 498 uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787. 499 uint8 g = (g0 + g1 + g2 + g3); 500 uint8 r = (r0 + r1 + r2 + r3); 501 b = (b << 1) | (b >> 6); // 787 -> 888. 502 r = (r << 1) | (r >> 6); 503 dst_u[0] = RGBToU(r, g, b); 504 dst_v[0] = RGBToV(r, g, b); 505 src_rgb565 += 4; 506 next_rgb565 += 4; 507 dst_u += 1; 508 dst_v += 1; 509 } 510 if (width & 1) { 511 uint8 b0 = src_rgb565[0] & 0x1f; 512 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 513 uint8 r0 = src_rgb565[1] >> 3; 514 uint8 b2 = next_rgb565[0] & 0x1f; 515 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); 516 uint8 r2 = next_rgb565[1] >> 3; 517 uint8 b = (b0 + b2); // 565 * 2 = 676. 518 uint8 g = (g0 + g2); 519 uint8 r = (r0 + r2); 520 b = (b << 2) | (b >> 4); // 676 -> 888 521 g = (g << 1) | (g >> 6); 522 r = (r << 2) | (r >> 4); 523 dst_u[0] = RGBToU(r, g, b); 524 dst_v[0] = RGBToV(r, g, b); 525 } 526 } 527 528 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, 529 uint8* dst_u, uint8* dst_v, int width) { 530 const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555; 531 int x; 532 for (x = 0; x < width - 1; x += 2) { 533 uint8 b0 = src_argb1555[0] & 0x1f; 534 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 535 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; 536 uint8 b1 = src_argb1555[2] & 0x1f; 537 uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); 538 uint8 r1 = (src_argb1555[3] & 0x7c) >> 2; 539 uint8 b2 = next_argb1555[0] & 0x1f; 540 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); 541 uint8 r2 = (next_argb1555[1] & 0x7c) >> 2; 542 uint8 b3 = next_argb1555[2] & 0x1f; 543 uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); 544 uint8 r3 = (next_argb1555[3] & 0x7c) >> 2; 545 uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777. 546 uint8 g = (g0 + g1 + g2 + g3); 547 uint8 r = (r0 + r1 + r2 + r3); 548 b = (b << 1) | (b >> 6); // 777 -> 888. 549 g = (g << 1) | (g >> 6); 550 r = (r << 1) | (r >> 6); 551 dst_u[0] = RGBToU(r, g, b); 552 dst_v[0] = RGBToV(r, g, b); 553 src_argb1555 += 4; 554 next_argb1555 += 4; 555 dst_u += 1; 556 dst_v += 1; 557 } 558 if (width & 1) { 559 uint8 b0 = src_argb1555[0] & 0x1f; 560 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 561 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; 562 uint8 b2 = next_argb1555[0] & 0x1f; 563 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); 564 uint8 r2 = next_argb1555[1] >> 3; 565 uint8 b = (b0 + b2); // 555 * 2 = 666. 566 uint8 g = (g0 + g2); 567 uint8 r = (r0 + r2); 568 b = (b << 2) | (b >> 4); // 666 -> 888. 569 g = (g << 2) | (g >> 4); 570 r = (r << 2) | (r >> 4); 571 dst_u[0] = RGBToU(r, g, b); 572 dst_v[0] = RGBToV(r, g, b); 573 } 574 } 575 576 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, 577 uint8* dst_u, uint8* dst_v, int width) { 578 const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444; 579 int x; 580 for (x = 0; x < width - 1; x += 2) { 581 uint8 b0 = src_argb4444[0] & 0x0f; 582 uint8 g0 = src_argb4444[0] >> 4; 583 uint8 r0 = src_argb4444[1] & 0x0f; 584 uint8 b1 = src_argb4444[2] & 0x0f; 585 uint8 g1 = src_argb4444[2] >> 4; 586 uint8 r1 = src_argb4444[3] & 0x0f; 587 uint8 b2 = next_argb4444[0] & 0x0f; 588 uint8 g2 = next_argb4444[0] >> 4; 589 uint8 r2 = next_argb4444[1] & 0x0f; 590 uint8 b3 = next_argb4444[2] & 0x0f; 591 uint8 g3 = next_argb4444[2] >> 4; 592 uint8 r3 = next_argb4444[3] & 0x0f; 593 uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666. 594 uint8 g = (g0 + g1 + g2 + g3); 595 uint8 r = (r0 + r1 + r2 + r3); 596 b = (b << 2) | (b >> 4); // 666 -> 888. 597 g = (g << 2) | (g >> 4); 598 r = (r << 2) | (r >> 4); 599 dst_u[0] = RGBToU(r, g, b); 600 dst_v[0] = RGBToV(r, g, b); 601 src_argb4444 += 4; 602 next_argb4444 += 4; 603 dst_u += 1; 604 dst_v += 1; 605 } 606 if (width & 1) { 607 uint8 b0 = src_argb4444[0] & 0x0f; 608 uint8 g0 = src_argb4444[0] >> 4; 609 uint8 r0 = src_argb4444[1] & 0x0f; 610 uint8 b2 = next_argb4444[0] & 0x0f; 611 uint8 g2 = next_argb4444[0] >> 4; 612 uint8 r2 = next_argb4444[1] & 0x0f; 613 uint8 b = (b0 + b2); // 444 * 2 = 555. 614 uint8 g = (g0 + g2); 615 uint8 r = (r0 + r2); 616 b = (b << 3) | (b >> 2); // 555 -> 888. 617 g = (g << 3) | (g >> 2); 618 r = (r << 3) | (r >> 2); 619 dst_u[0] = RGBToU(r, g, b); 620 dst_v[0] = RGBToV(r, g, b); 621 } 622 } 623 624 void ARGBToUV444Row_C(const uint8* src_argb, 625 uint8* dst_u, uint8* dst_v, int width) { 626 int x; 627 for (x = 0; x < width; ++x) { 628 uint8 ab = src_argb[0]; 629 uint8 ag = src_argb[1]; 630 uint8 ar = src_argb[2]; 631 dst_u[0] = RGBToU(ar, ag, ab); 632 dst_v[0] = RGBToV(ar, ag, ab); 633 src_argb += 4; 634 dst_u += 1; 635 dst_v += 1; 636 } 637 } 638 639 void ARGBToUV411Row_C(const uint8* src_argb, 640 uint8* dst_u, uint8* dst_v, int width) { 641 int x; 642 for (x = 0; x < width - 3; x += 4) { 643 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2; 644 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2; 645 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2; 646 dst_u[0] = RGBToU(ar, ag, ab); 647 dst_v[0] = RGBToV(ar, ag, ab); 648 src_argb += 16; 649 dst_u += 1; 650 dst_v += 1; 651 } 652 // Odd width handling mimics 'any' function which replicates last pixel. 653 if ((width & 3) == 3) { 654 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[8]) >> 2; 655 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[9]) >> 2; 656 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[10]) >> 2; 657 dst_u[0] = RGBToU(ar, ag, ab); 658 dst_v[0] = RGBToV(ar, ag, ab); 659 } else if ((width & 3) == 2) { 660 uint8 ab = (src_argb[0] + src_argb[4]) >> 1; 661 uint8 ag = (src_argb[1] + src_argb[5]) >> 1; 662 uint8 ar = (src_argb[2] + src_argb[6]) >> 1; 663 dst_u[0] = RGBToU(ar, ag, ab); 664 dst_v[0] = RGBToV(ar, ag, ab); 665 } else if ((width & 3) == 1) { 666 uint8 ab = src_argb[0]; 667 uint8 ag = src_argb[1]; 668 uint8 ar = src_argb[2]; 669 dst_u[0] = RGBToU(ar, ag, ab); 670 dst_v[0] = RGBToV(ar, ag, ab); 671 } 672 } 673 674 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 675 int x; 676 for (x = 0; x < width; ++x) { 677 uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]); 678 dst_argb[2] = dst_argb[1] = dst_argb[0] = y; 679 dst_argb[3] = src_argb[3]; 680 dst_argb += 4; 681 src_argb += 4; 682 } 683 } 684 685 // Convert a row of image to Sepia tone. 686 void ARGBSepiaRow_C(uint8* dst_argb, int width) { 687 int x; 688 for (x = 0; x < width; ++x) { 689 int b = dst_argb[0]; 690 int g = dst_argb[1]; 691 int r = dst_argb[2]; 692 int sb = (b * 17 + g * 68 + r * 35) >> 7; 693 int sg = (b * 22 + g * 88 + r * 45) >> 7; 694 int sr = (b * 24 + g * 98 + r * 50) >> 7; 695 // b does not over flow. a is preserved from original. 696 dst_argb[0] = sb; 697 dst_argb[1] = clamp255(sg); 698 dst_argb[2] = clamp255(sr); 699 dst_argb += 4; 700 } 701 } 702 703 // Apply color matrix to a row of image. Matrix is signed. 704 // TODO(fbarchard): Consider adding rounding (+32). 705 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, 706 const int8* matrix_argb, int width) { 707 int x; 708 for (x = 0; x < width; ++x) { 709 int b = src_argb[0]; 710 int g = src_argb[1]; 711 int r = src_argb[2]; 712 int a = src_argb[3]; 713 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + 714 r * matrix_argb[2] + a * matrix_argb[3]) >> 6; 715 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + 716 r * matrix_argb[6] + a * matrix_argb[7]) >> 6; 717 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + 718 r * matrix_argb[10] + a * matrix_argb[11]) >> 6; 719 int sa = (b * matrix_argb[12] + g * matrix_argb[13] + 720 r * matrix_argb[14] + a * matrix_argb[15]) >> 6; 721 dst_argb[0] = Clamp(sb); 722 dst_argb[1] = Clamp(sg); 723 dst_argb[2] = Clamp(sr); 724 dst_argb[3] = Clamp(sa); 725 src_argb += 4; 726 dst_argb += 4; 727 } 728 } 729 730 // Apply color table to a row of image. 731 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { 732 int x; 733 for (x = 0; x < width; ++x) { 734 int b = dst_argb[0]; 735 int g = dst_argb[1]; 736 int r = dst_argb[2]; 737 int a = dst_argb[3]; 738 dst_argb[0] = table_argb[b * 4 + 0]; 739 dst_argb[1] = table_argb[g * 4 + 1]; 740 dst_argb[2] = table_argb[r * 4 + 2]; 741 dst_argb[3] = table_argb[a * 4 + 3]; 742 dst_argb += 4; 743 } 744 } 745 746 // Apply color table to a row of image. 747 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { 748 int x; 749 for (x = 0; x < width; ++x) { 750 int b = dst_argb[0]; 751 int g = dst_argb[1]; 752 int r = dst_argb[2]; 753 dst_argb[0] = table_argb[b * 4 + 0]; 754 dst_argb[1] = table_argb[g * 4 + 1]; 755 dst_argb[2] = table_argb[r * 4 + 2]; 756 dst_argb += 4; 757 } 758 } 759 760 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, 761 int interval_offset, int width) { 762 int x; 763 for (x = 0; x < width; ++x) { 764 int b = dst_argb[0]; 765 int g = dst_argb[1]; 766 int r = dst_argb[2]; 767 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; 768 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset; 769 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset; 770 dst_argb += 4; 771 } 772 } 773 774 #define REPEAT8(v) (v) | ((v) << 8) 775 #define SHADE(f, v) v * f >> 24 776 777 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, 778 uint32 value) { 779 const uint32 b_scale = REPEAT8(value & 0xff); 780 const uint32 g_scale = REPEAT8((value >> 8) & 0xff); 781 const uint32 r_scale = REPEAT8((value >> 16) & 0xff); 782 const uint32 a_scale = REPEAT8(value >> 24); 783 784 int i; 785 for (i = 0; i < width; ++i) { 786 const uint32 b = REPEAT8(src_argb[0]); 787 const uint32 g = REPEAT8(src_argb[1]); 788 const uint32 r = REPEAT8(src_argb[2]); 789 const uint32 a = REPEAT8(src_argb[3]); 790 dst_argb[0] = SHADE(b, b_scale); 791 dst_argb[1] = SHADE(g, g_scale); 792 dst_argb[2] = SHADE(r, r_scale); 793 dst_argb[3] = SHADE(a, a_scale); 794 src_argb += 4; 795 dst_argb += 4; 796 } 797 } 798 #undef REPEAT8 799 #undef SHADE 800 801 #define REPEAT8(v) (v) | ((v) << 8) 802 #define SHADE(f, v) v * f >> 16 803 804 void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1, 805 uint8* dst_argb, int width) { 806 int i; 807 for (i = 0; i < width; ++i) { 808 const uint32 b = REPEAT8(src_argb0[0]); 809 const uint32 g = REPEAT8(src_argb0[1]); 810 const uint32 r = REPEAT8(src_argb0[2]); 811 const uint32 a = REPEAT8(src_argb0[3]); 812 const uint32 b_scale = src_argb1[0]; 813 const uint32 g_scale = src_argb1[1]; 814 const uint32 r_scale = src_argb1[2]; 815 const uint32 a_scale = src_argb1[3]; 816 dst_argb[0] = SHADE(b, b_scale); 817 dst_argb[1] = SHADE(g, g_scale); 818 dst_argb[2] = SHADE(r, r_scale); 819 dst_argb[3] = SHADE(a, a_scale); 820 src_argb0 += 4; 821 src_argb1 += 4; 822 dst_argb += 4; 823 } 824 } 825 #undef REPEAT8 826 #undef SHADE 827 828 #define SHADE(f, v) clamp255(v + f) 829 830 void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1, 831 uint8* dst_argb, int width) { 832 int i; 833 for (i = 0; i < width; ++i) { 834 const int b = src_argb0[0]; 835 const int g = src_argb0[1]; 836 const int r = src_argb0[2]; 837 const int a = src_argb0[3]; 838 const int b_add = src_argb1[0]; 839 const int g_add = src_argb1[1]; 840 const int r_add = src_argb1[2]; 841 const int a_add = src_argb1[3]; 842 dst_argb[0] = SHADE(b, b_add); 843 dst_argb[1] = SHADE(g, g_add); 844 dst_argb[2] = SHADE(r, r_add); 845 dst_argb[3] = SHADE(a, a_add); 846 src_argb0 += 4; 847 src_argb1 += 4; 848 dst_argb += 4; 849 } 850 } 851 #undef SHADE 852 853 #define SHADE(f, v) clamp0(f - v) 854 855 void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1, 856 uint8* dst_argb, int width) { 857 int i; 858 for (i = 0; i < width; ++i) { 859 const int b = src_argb0[0]; 860 const int g = src_argb0[1]; 861 const int r = src_argb0[2]; 862 const int a = src_argb0[3]; 863 const int b_sub = src_argb1[0]; 864 const int g_sub = src_argb1[1]; 865 const int r_sub = src_argb1[2]; 866 const int a_sub = src_argb1[3]; 867 dst_argb[0] = SHADE(b, b_sub); 868 dst_argb[1] = SHADE(g, g_sub); 869 dst_argb[2] = SHADE(r, r_sub); 870 dst_argb[3] = SHADE(a, a_sub); 871 src_argb0 += 4; 872 src_argb1 += 4; 873 dst_argb += 4; 874 } 875 } 876 #undef SHADE 877 878 // Sobel functions which mimics SSSE3. 879 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, 880 uint8* dst_sobelx, int width) { 881 int i; 882 for (i = 0; i < width; ++i) { 883 int a = src_y0[i]; 884 int b = src_y1[i]; 885 int c = src_y2[i]; 886 int a_sub = src_y0[i + 2]; 887 int b_sub = src_y1[i + 2]; 888 int c_sub = src_y2[i + 2]; 889 int a_diff = a - a_sub; 890 int b_diff = b - b_sub; 891 int c_diff = c - c_sub; 892 int sobel = Abs(a_diff + b_diff * 2 + c_diff); 893 dst_sobelx[i] = (uint8)(clamp255(sobel)); 894 } 895 } 896 897 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, 898 uint8* dst_sobely, int width) { 899 int i; 900 for (i = 0; i < width; ++i) { 901 int a = src_y0[i + 0]; 902 int b = src_y0[i + 1]; 903 int c = src_y0[i + 2]; 904 int a_sub = src_y1[i + 0]; 905 int b_sub = src_y1[i + 1]; 906 int c_sub = src_y1[i + 2]; 907 int a_diff = a - a_sub; 908 int b_diff = b - b_sub; 909 int c_diff = c - c_sub; 910 int sobel = Abs(a_diff + b_diff * 2 + c_diff); 911 dst_sobely[i] = (uint8)(clamp255(sobel)); 912 } 913 } 914 915 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, 916 uint8* dst_argb, int width) { 917 int i; 918 for (i = 0; i < width; ++i) { 919 int r = src_sobelx[i]; 920 int b = src_sobely[i]; 921 int s = clamp255(r + b); 922 dst_argb[0] = (uint8)(s); 923 dst_argb[1] = (uint8)(s); 924 dst_argb[2] = (uint8)(s); 925 dst_argb[3] = (uint8)(255u); 926 dst_argb += 4; 927 } 928 } 929 930 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely, 931 uint8* dst_y, int width) { 932 int i; 933 for (i = 0; i < width; ++i) { 934 int r = src_sobelx[i]; 935 int b = src_sobely[i]; 936 int s = clamp255(r + b); 937 dst_y[i] = (uint8)(s); 938 } 939 } 940 941 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, 942 uint8* dst_argb, int width) { 943 int i; 944 for (i = 0; i < width; ++i) { 945 int r = src_sobelx[i]; 946 int b = src_sobely[i]; 947 int g = clamp255(r + b); 948 dst_argb[0] = (uint8)(b); 949 dst_argb[1] = (uint8)(g); 950 dst_argb[2] = (uint8)(r); 951 dst_argb[3] = (uint8)(255u); 952 dst_argb += 4; 953 } 954 } 955 956 void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { 957 // Copy a Y to RGB. 958 int x; 959 for (x = 0; x < width; ++x) { 960 uint8 y = src_y[0]; 961 dst_argb[2] = dst_argb[1] = dst_argb[0] = y; 962 dst_argb[3] = 255u; 963 dst_argb += 4; 964 ++src_y; 965 } 966 } 967 968 // TODO(fbarchard): Unify these structures to be platform independent. 969 // TODO(fbarchard): Generate SIMD structures from float matrix. 970 971 // BT.601 YUV to RGB reference 972 // R = (Y - 16) * 1.164 - V * -1.596 973 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 974 // B = (Y - 16) * 1.164 - U * -2.018 975 976 // Y contribution to R,G,B. Scale and bias. 977 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ 978 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ 979 980 // U and V contributions to R,G,B. 981 #define UB -128 /* max(-128, round(-2.018 * 64)) */ 982 #define UG 25 /* round(0.391 * 64) */ 983 #define VG 52 /* round(0.813 * 64) */ 984 #define VR -102 /* round(-1.596 * 64) */ 985 986 // Bias values to subtract 16 from Y and 128 from U and V. 987 #define BB (UB * 128 + YGB) 988 #define BG (UG * 128 + VG * 128 + YGB) 989 #define BR (VR * 128 + YGB) 990 991 #if defined(__aarch64__) 992 const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { 993 { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, 994 { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, 995 { UG, VG, UG, VG, UG, VG, UG, VG }, 996 { UG, VG, UG, VG, UG, VG, UG, VG }, 997 { BB, BG, BR, 0, 0, 0, 0, 0 }, 998 { 0x0101 * YG, 0, 0, 0 } 999 }; 1000 const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { 1001 { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, 1002 { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, 1003 { VG, UG, VG, UG, VG, UG, VG, UG }, 1004 { VG, UG, VG, UG, VG, UG, VG, UG }, 1005 { BR, BG, BB, 0, 0, 0, 0, 0 }, 1006 { 0x0101 * YG, 0, 0, 0 } 1007 }; 1008 #elif defined(__arm__) 1009 const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { 1010 { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, 1011 { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, 1012 { BB, BG, BR, 0, 0, 0, 0, 0 }, 1013 { 0x0101 * YG, 0, 0, 0 } 1014 }; 1015 const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { 1016 { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 }, 1017 { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 }, 1018 { BR, BG, BB, 0, 0, 0, 0, 0 }, 1019 { 0x0101 * YG, 0, 0, 0 } 1020 }; 1021 #else 1022 const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { 1023 { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, 1024 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 }, 1025 { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, 1026 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG }, 1027 { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 1028 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR }, 1029 { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, 1030 { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, 1031 { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, 1032 { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } 1033 }; 1034 const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { 1035 { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, 1036 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 }, 1037 { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, 1038 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, 1039 { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 1040 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, 1041 { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, 1042 { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, 1043 { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, 1044 { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } 1045 }; 1046 #endif 1047 1048 #undef BB 1049 #undef BG 1050 #undef BR 1051 #undef YGB 1052 #undef UB 1053 #undef UG 1054 #undef VG 1055 #undef VR 1056 #undef YG 1057 1058 // JPEG YUV to RGB reference 1059 // * R = Y - V * -1.40200 1060 // * G = Y - U * 0.34414 - V * 0.71414 1061 // * B = Y - U * -1.77200 1062 1063 // Y contribution to R,G,B. Scale and bias. 1064 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ 1065 #define YGB 32 /* 64 / 2 */ 1066 1067 // U and V contributions to R,G,B. 1068 #define UB -113 /* round(-1.77200 * 64) */ 1069 #define UG 22 /* round(0.34414 * 64) */ 1070 #define VG 46 /* round(0.71414 * 64) */ 1071 #define VR -90 /* round(-1.40200 * 64) */ 1072 1073 // Bias values to round, and subtract 128 from U and V. 1074 #define BB (UB * 128 + YGB) 1075 #define BG (UG * 128 + VG * 128 + YGB) 1076 #define BR (VR * 128 + YGB) 1077 1078 #if defined(__aarch64__) 1079 const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { 1080 { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, 1081 { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, 1082 { UG, VG, UG, VG, UG, VG, UG, VG }, 1083 { UG, VG, UG, VG, UG, VG, UG, VG }, 1084 { BB, BG, BR, 0, 0, 0, 0, 0 }, 1085 { 0x0101 * YG, 0, 0, 0 } 1086 }; 1087 const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { 1088 { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, 1089 { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, 1090 { VG, UG, VG, UG, VG, UG, VG, UG }, 1091 { VG, UG, VG, UG, VG, UG, VG, UG }, 1092 { BR, BG, BB, 0, 0, 0, 0, 0 }, 1093 { 0x0101 * YG, 0, 0, 0 } 1094 }; 1095 #elif defined(__arm__) 1096 const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { 1097 { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, 1098 { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, 1099 { BB, BG, BR, 0, 0, 0, 0, 0 }, 1100 { 0x0101 * YG, 0, 0, 0 } 1101 }; 1102 const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { 1103 { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 }, 1104 { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 }, 1105 { BR, BG, BB, 0, 0, 0, 0, 0 }, 1106 { 0x0101 * YG, 0, 0, 0 } 1107 }; 1108 #else 1109 const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { 1110 { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, 1111 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 }, 1112 { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, 1113 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG }, 1114 { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 1115 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR }, 1116 { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, 1117 { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, 1118 { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, 1119 { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } 1120 }; 1121 const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { 1122 { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, 1123 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 }, 1124 { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, 1125 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, 1126 { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 1127 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, 1128 { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, 1129 { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, 1130 { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, 1131 { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } 1132 }; 1133 #endif 1134 1135 #undef BB 1136 #undef BG 1137 #undef BR 1138 #undef YGB 1139 #undef UB 1140 #undef UG 1141 #undef VG 1142 #undef VR 1143 #undef YG 1144 1145 // BT.709 YUV to RGB reference 1146 // * R = Y - V * -1.28033 1147 // * G = Y - U * 0.21482 - V * 0.38059 1148 // * B = Y - U * -2.12798 1149 1150 // Y contribution to R,G,B. Scale and bias. 1151 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ 1152 #define YGB 32 /* 64 / 2 */ 1153 1154 // TODO(fbarchard): Find way to express 2.12 instead of 2.0. 1155 // U and V contributions to R,G,B. 1156 #define UB -128 /* max(-128, round(-2.12798 * 64)) */ 1157 #define UG 14 /* round(0.21482 * 64) */ 1158 #define VG 24 /* round(0.38059 * 64) */ 1159 #define VR -82 /* round(-1.28033 * 64) */ 1160 1161 // Bias values to round, and subtract 128 from U and V. 1162 #define BB (UB * 128 + YGB) 1163 #define BG (UG * 128 + VG * 128 + YGB) 1164 #define BR (VR * 128 + YGB) 1165 1166 #if defined(__aarch64__) 1167 const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { 1168 { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, 1169 { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, 1170 { UG, VG, UG, VG, UG, VG, UG, VG }, 1171 { UG, VG, UG, VG, UG, VG, UG, VG }, 1172 { BB, BG, BR, 0, 0, 0, 0, 0 }, 1173 { 0x0101 * YG, 0, 0, 0 } 1174 }; 1175 const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { 1176 { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, 1177 { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, 1178 { VG, UG, VG, UG, VG, UG, VG, UG }, 1179 { VG, UG, VG, UG, VG, UG, VG, UG }, 1180 { BR, BG, BB, 0, 0, 0, 0, 0 }, 1181 { 0x0101 * YG, 0, 0, 0 } 1182 }; 1183 #elif defined(__arm__) 1184 const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { 1185 { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, 1186 { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, 1187 { BB, BG, BR, 0, 0, 0, 0, 0 }, 1188 { 0x0101 * YG, 0, 0, 0 } 1189 }; 1190 const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { 1191 { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 }, 1192 { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 }, 1193 { BR, BG, BB, 0, 0, 0, 0, 0 }, 1194 { 0x0101 * YG, 0, 0, 0 } 1195 }; 1196 #else 1197 const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { 1198 { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, 1199 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 }, 1200 { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, 1201 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG }, 1202 { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 1203 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR }, 1204 { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, 1205 { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, 1206 { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, 1207 { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } 1208 }; 1209 const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { 1210 { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, 1211 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 }, 1212 { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, 1213 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, 1214 { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 1215 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, 1216 { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, 1217 { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, 1218 { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, 1219 { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } 1220 }; 1221 #endif 1222 1223 #undef BB 1224 #undef BG 1225 #undef BR 1226 #undef YGB 1227 #undef UB 1228 #undef UG 1229 #undef VG 1230 #undef VR 1231 #undef YG 1232 1233 // C reference code that mimics the YUV assembly. 1234 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, 1235 uint8* b, uint8* g, uint8* r, 1236 const struct YuvConstants* yuvconstants) { 1237 #if defined(__aarch64__) 1238 int ub = -yuvconstants->kUVToRB[0]; 1239 int ug = yuvconstants->kUVToG[0]; 1240 int vg = yuvconstants->kUVToG[1]; 1241 int vr = -yuvconstants->kUVToRB[1]; 1242 int bb = yuvconstants->kUVBiasBGR[0]; 1243 int bg = yuvconstants->kUVBiasBGR[1]; 1244 int br = yuvconstants->kUVBiasBGR[2]; 1245 int yg = yuvconstants->kYToRgb[0] / 0x0101; 1246 #elif defined(__arm__) 1247 int ub = -yuvconstants->kUVToRB[0]; 1248 int ug = yuvconstants->kUVToG[0]; 1249 int vg = yuvconstants->kUVToG[4]; 1250 int vr = -yuvconstants->kUVToRB[4]; 1251 int bb = yuvconstants->kUVBiasBGR[0]; 1252 int bg = yuvconstants->kUVBiasBGR[1]; 1253 int br = yuvconstants->kUVBiasBGR[2]; 1254 int yg = yuvconstants->kYToRgb[0] / 0x0101; 1255 #else 1256 int ub = yuvconstants->kUVToB[0]; 1257 int ug = yuvconstants->kUVToG[0]; 1258 int vg = yuvconstants->kUVToG[1]; 1259 int vr = yuvconstants->kUVToR[1]; 1260 int bb = yuvconstants->kUVBiasB[0]; 1261 int bg = yuvconstants->kUVBiasG[0]; 1262 int br = yuvconstants->kUVBiasR[0]; 1263 int yg = yuvconstants->kYToRgb[0]; 1264 #endif 1265 1266 uint32 y1 = (uint32)(y * 0x0101 * yg) >> 16; 1267 *b = Clamp((int32)(-(u * ub ) + y1 + bb) >> 6); 1268 *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6); 1269 *r = Clamp((int32)(-( v * vr) + y1 + br) >> 6); 1270 } 1271 1272 // Y contribution to R,G,B. Scale and bias. 1273 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ 1274 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ 1275 1276 // C reference code that mimics the YUV assembly. 1277 static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { 1278 uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16; 1279 *b = Clamp((int32)(y1 + YGB) >> 6); 1280 *g = Clamp((int32)(y1 + YGB) >> 6); 1281 *r = Clamp((int32)(y1 + YGB) >> 6); 1282 } 1283 1284 #undef YG 1285 #undef YGB 1286 1287 #if !defined(LIBYUV_DISABLE_NEON) && \ 1288 (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON)) 1289 // C mimic assembly. 1290 // TODO(fbarchard): Remove subsampling from Neon. 1291 void I444ToARGBRow_C(const uint8* src_y, 1292 const uint8* src_u, 1293 const uint8* src_v, 1294 uint8* rgb_buf, 1295 const struct YuvConstants* yuvconstants, 1296 int width) { 1297 int x; 1298 for (x = 0; x < width - 1; x += 2) { 1299 uint8 u = (src_u[0] + src_u[1] + 1) >> 1; 1300 uint8 v = (src_v[0] + src_v[1] + 1) >> 1; 1301 YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, 1302 yuvconstants); 1303 rgb_buf[3] = 255; 1304 YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, 1305 yuvconstants); 1306 rgb_buf[7] = 255; 1307 src_y += 2; 1308 src_u += 2; 1309 src_v += 2; 1310 rgb_buf += 8; // Advance 2 pixels. 1311 } 1312 if (width & 1) { 1313 YuvPixel(src_y[0], src_u[0], src_v[0], 1314 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1315 rgb_buf[3] = 255; 1316 } 1317 } 1318 #else 1319 void I444ToARGBRow_C(const uint8* src_y, 1320 const uint8* src_u, 1321 const uint8* src_v, 1322 uint8* rgb_buf, 1323 const struct YuvConstants* yuvconstants, 1324 int width) { 1325 int x; 1326 for (x = 0; x < width; ++x) { 1327 YuvPixel(src_y[0], src_u[0], src_v[0], 1328 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1329 rgb_buf[3] = 255; 1330 src_y += 1; 1331 src_u += 1; 1332 src_v += 1; 1333 rgb_buf += 4; // Advance 1 pixel. 1334 } 1335 } 1336 #endif 1337 1338 // Also used for 420 1339 void I422ToARGBRow_C(const uint8* src_y, 1340 const uint8* src_u, 1341 const uint8* src_v, 1342 uint8* rgb_buf, 1343 const struct YuvConstants* yuvconstants, 1344 int width) { 1345 int x; 1346 for (x = 0; x < width - 1; x += 2) { 1347 YuvPixel(src_y[0], src_u[0], src_v[0], 1348 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1349 rgb_buf[3] = 255; 1350 YuvPixel(src_y[1], src_u[0], src_v[0], 1351 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); 1352 rgb_buf[7] = 255; 1353 src_y += 2; 1354 src_u += 1; 1355 src_v += 1; 1356 rgb_buf += 8; // Advance 2 pixels. 1357 } 1358 if (width & 1) { 1359 YuvPixel(src_y[0], src_u[0], src_v[0], 1360 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1361 rgb_buf[3] = 255; 1362 } 1363 } 1364 1365 void I422AlphaToARGBRow_C(const uint8* src_y, 1366 const uint8* src_u, 1367 const uint8* src_v, 1368 const uint8* src_a, 1369 uint8* rgb_buf, 1370 const struct YuvConstants* yuvconstants, 1371 int width) { 1372 int x; 1373 for (x = 0; x < width - 1; x += 2) { 1374 YuvPixel(src_y[0], src_u[0], src_v[0], 1375 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1376 rgb_buf[3] = src_a[0]; 1377 YuvPixel(src_y[1], src_u[0], src_v[0], 1378 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); 1379 rgb_buf[7] = src_a[1]; 1380 src_y += 2; 1381 src_u += 1; 1382 src_v += 1; 1383 src_a += 2; 1384 rgb_buf += 8; // Advance 2 pixels. 1385 } 1386 if (width & 1) { 1387 YuvPixel(src_y[0], src_u[0], src_v[0], 1388 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1389 rgb_buf[3] = src_a[0]; 1390 } 1391 } 1392 1393 void I422ToRGB24Row_C(const uint8* src_y, 1394 const uint8* src_u, 1395 const uint8* src_v, 1396 uint8* rgb_buf, 1397 const struct YuvConstants* yuvconstants, 1398 int width) { 1399 int x; 1400 for (x = 0; x < width - 1; x += 2) { 1401 YuvPixel(src_y[0], src_u[0], src_v[0], 1402 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1403 YuvPixel(src_y[1], src_u[0], src_v[0], 1404 rgb_buf + 3, rgb_buf + 4, rgb_buf + 5, yuvconstants); 1405 src_y += 2; 1406 src_u += 1; 1407 src_v += 1; 1408 rgb_buf += 6; // Advance 2 pixels. 1409 } 1410 if (width & 1) { 1411 YuvPixel(src_y[0], src_u[0], src_v[0], 1412 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1413 } 1414 } 1415 1416 void I422ToARGB4444Row_C(const uint8* src_y, 1417 const uint8* src_u, 1418 const uint8* src_v, 1419 uint8* dst_argb4444, 1420 const struct YuvConstants* yuvconstants, 1421 int width) { 1422 uint8 b0; 1423 uint8 g0; 1424 uint8 r0; 1425 uint8 b1; 1426 uint8 g1; 1427 uint8 r1; 1428 int x; 1429 for (x = 0; x < width - 1; x += 2) { 1430 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1431 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); 1432 b0 = b0 >> 4; 1433 g0 = g0 >> 4; 1434 r0 = r0 >> 4; 1435 b1 = b1 >> 4; 1436 g1 = g1 >> 4; 1437 r1 = r1 >> 4; 1438 *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 1439 (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000; 1440 src_y += 2; 1441 src_u += 1; 1442 src_v += 1; 1443 dst_argb4444 += 4; // Advance 2 pixels. 1444 } 1445 if (width & 1) { 1446 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1447 b0 = b0 >> 4; 1448 g0 = g0 >> 4; 1449 r0 = r0 >> 4; 1450 *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 1451 0xf000; 1452 } 1453 } 1454 1455 void I422ToARGB1555Row_C(const uint8* src_y, 1456 const uint8* src_u, 1457 const uint8* src_v, 1458 uint8* dst_argb1555, 1459 const struct YuvConstants* yuvconstants, 1460 int width) { 1461 uint8 b0; 1462 uint8 g0; 1463 uint8 r0; 1464 uint8 b1; 1465 uint8 g1; 1466 uint8 r1; 1467 int x; 1468 for (x = 0; x < width - 1; x += 2) { 1469 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1470 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); 1471 b0 = b0 >> 3; 1472 g0 = g0 >> 3; 1473 r0 = r0 >> 3; 1474 b1 = b1 >> 3; 1475 g1 = g1 >> 3; 1476 r1 = r1 >> 3; 1477 *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 1478 (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000; 1479 src_y += 2; 1480 src_u += 1; 1481 src_v += 1; 1482 dst_argb1555 += 4; // Advance 2 pixels. 1483 } 1484 if (width & 1) { 1485 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1486 b0 = b0 >> 3; 1487 g0 = g0 >> 3; 1488 r0 = r0 >> 3; 1489 *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 1490 0x8000; 1491 } 1492 } 1493 1494 void I422ToRGB565Row_C(const uint8* src_y, 1495 const uint8* src_u, 1496 const uint8* src_v, 1497 uint8* dst_rgb565, 1498 const struct YuvConstants* yuvconstants, 1499 int width) { 1500 uint8 b0; 1501 uint8 g0; 1502 uint8 r0; 1503 uint8 b1; 1504 uint8 g1; 1505 uint8 r1; 1506 int x; 1507 for (x = 0; x < width - 1; x += 2) { 1508 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1509 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); 1510 b0 = b0 >> 3; 1511 g0 = g0 >> 2; 1512 r0 = r0 >> 3; 1513 b1 = b1 >> 3; 1514 g1 = g1 >> 2; 1515 r1 = r1 >> 3; 1516 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | 1517 (b1 << 16) | (g1 << 21) | (r1 << 27); 1518 src_y += 2; 1519 src_u += 1; 1520 src_v += 1; 1521 dst_rgb565 += 4; // Advance 2 pixels. 1522 } 1523 if (width & 1) { 1524 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1525 b0 = b0 >> 3; 1526 g0 = g0 >> 2; 1527 r0 = r0 >> 3; 1528 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); 1529 } 1530 } 1531 1532 void I411ToARGBRow_C(const uint8* src_y, 1533 const uint8* src_u, 1534 const uint8* src_v, 1535 uint8* rgb_buf, 1536 const struct YuvConstants* yuvconstants, 1537 int width) { 1538 int x; 1539 for (x = 0; x < width - 3; x += 4) { 1540 YuvPixel(src_y[0], src_u[0], src_v[0], 1541 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1542 rgb_buf[3] = 255; 1543 YuvPixel(src_y[1], src_u[0], src_v[0], 1544 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); 1545 rgb_buf[7] = 255; 1546 YuvPixel(src_y[2], src_u[0], src_v[0], 1547 rgb_buf + 8, rgb_buf + 9, rgb_buf + 10, yuvconstants); 1548 rgb_buf[11] = 255; 1549 YuvPixel(src_y[3], src_u[0], src_v[0], 1550 rgb_buf + 12, rgb_buf + 13, rgb_buf + 14, yuvconstants); 1551 rgb_buf[15] = 255; 1552 src_y += 4; 1553 src_u += 1; 1554 src_v += 1; 1555 rgb_buf += 16; // Advance 4 pixels. 1556 } 1557 if (width & 2) { 1558 YuvPixel(src_y[0], src_u[0], src_v[0], 1559 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1560 rgb_buf[3] = 255; 1561 YuvPixel(src_y[1], src_u[0], src_v[0], 1562 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); 1563 rgb_buf[7] = 255; 1564 src_y += 2; 1565 rgb_buf += 8; // Advance 2 pixels. 1566 } 1567 if (width & 1) { 1568 YuvPixel(src_y[0], src_u[0], src_v[0], 1569 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1570 rgb_buf[3] = 255; 1571 } 1572 } 1573 1574 void NV12ToARGBRow_C(const uint8* src_y, 1575 const uint8* src_uv, 1576 uint8* rgb_buf, 1577 const struct YuvConstants* yuvconstants, 1578 int width) { 1579 int x; 1580 for (x = 0; x < width - 1; x += 2) { 1581 YuvPixel(src_y[0], src_uv[0], src_uv[1], 1582 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1583 rgb_buf[3] = 255; 1584 YuvPixel(src_y[1], src_uv[0], src_uv[1], 1585 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); 1586 rgb_buf[7] = 255; 1587 src_y += 2; 1588 src_uv += 2; 1589 rgb_buf += 8; // Advance 2 pixels. 1590 } 1591 if (width & 1) { 1592 YuvPixel(src_y[0], src_uv[0], src_uv[1], 1593 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1594 rgb_buf[3] = 255; 1595 } 1596 } 1597 1598 void NV21ToARGBRow_C(const uint8* src_y, 1599 const uint8* src_vu, 1600 uint8* rgb_buf, 1601 const struct YuvConstants* yuvconstants, 1602 int width) { 1603 int x; 1604 for (x = 0; x < width - 1; x += 2) { 1605 YuvPixel(src_y[0], src_vu[1], src_vu[0], 1606 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1607 rgb_buf[3] = 255; 1608 YuvPixel(src_y[1], src_vu[1], src_vu[0], 1609 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); 1610 rgb_buf[7] = 255; 1611 src_y += 2; 1612 src_vu += 2; 1613 rgb_buf += 8; // Advance 2 pixels. 1614 } 1615 if (width & 1) { 1616 YuvPixel(src_y[0], src_vu[1], src_vu[0], 1617 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1618 rgb_buf[3] = 255; 1619 } 1620 } 1621 1622 void NV12ToRGB565Row_C(const uint8* src_y, 1623 const uint8* src_uv, 1624 uint8* dst_rgb565, 1625 const struct YuvConstants* yuvconstants, 1626 int width) { 1627 uint8 b0; 1628 uint8 g0; 1629 uint8 r0; 1630 uint8 b1; 1631 uint8 g1; 1632 uint8 r1; 1633 int x; 1634 for (x = 0; x < width - 1; x += 2) { 1635 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); 1636 YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants); 1637 b0 = b0 >> 3; 1638 g0 = g0 >> 2; 1639 r0 = r0 >> 3; 1640 b1 = b1 >> 3; 1641 g1 = g1 >> 2; 1642 r1 = r1 >> 3; 1643 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | 1644 (b1 << 16) | (g1 << 21) | (r1 << 27); 1645 src_y += 2; 1646 src_uv += 2; 1647 dst_rgb565 += 4; // Advance 2 pixels. 1648 } 1649 if (width & 1) { 1650 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); 1651 b0 = b0 >> 3; 1652 g0 = g0 >> 2; 1653 r0 = r0 >> 3; 1654 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); 1655 } 1656 } 1657 1658 void YUY2ToARGBRow_C(const uint8* src_yuy2, 1659 uint8* rgb_buf, 1660 const struct YuvConstants* yuvconstants, 1661 int width) { 1662 int x; 1663 for (x = 0; x < width - 1; x += 2) { 1664 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], 1665 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1666 rgb_buf[3] = 255; 1667 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], 1668 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); 1669 rgb_buf[7] = 255; 1670 src_yuy2 += 4; 1671 rgb_buf += 8; // Advance 2 pixels. 1672 } 1673 if (width & 1) { 1674 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], 1675 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1676 rgb_buf[3] = 255; 1677 } 1678 } 1679 1680 void UYVYToARGBRow_C(const uint8* src_uyvy, 1681 uint8* rgb_buf, 1682 const struct YuvConstants* yuvconstants, 1683 int width) { 1684 int x; 1685 for (x = 0; x < width - 1; x += 2) { 1686 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], 1687 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1688 rgb_buf[3] = 255; 1689 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], 1690 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); 1691 rgb_buf[7] = 255; 1692 src_uyvy += 4; 1693 rgb_buf += 8; // Advance 2 pixels. 1694 } 1695 if (width & 1) { 1696 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], 1697 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); 1698 rgb_buf[3] = 255; 1699 } 1700 } 1701 1702 void I422ToRGBARow_C(const uint8* src_y, 1703 const uint8* src_u, 1704 const uint8* src_v, 1705 uint8* rgb_buf, 1706 const struct YuvConstants* yuvconstants, 1707 int width) { 1708 int x; 1709 for (x = 0; x < width - 1; x += 2) { 1710 YuvPixel(src_y[0], src_u[0], src_v[0], 1711 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants); 1712 rgb_buf[0] = 255; 1713 YuvPixel(src_y[1], src_u[0], src_v[0], 1714 rgb_buf + 5, rgb_buf + 6, rgb_buf + 7, yuvconstants); 1715 rgb_buf[4] = 255; 1716 src_y += 2; 1717 src_u += 1; 1718 src_v += 1; 1719 rgb_buf += 8; // Advance 2 pixels. 1720 } 1721 if (width & 1) { 1722 YuvPixel(src_y[0], src_u[0], src_v[0], 1723 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants); 1724 rgb_buf[0] = 255; 1725 } 1726 } 1727 1728 void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) { 1729 int x; 1730 for (x = 0; x < width - 1; x += 2) { 1731 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1732 rgb_buf[3] = 255; 1733 YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1734 rgb_buf[7] = 255; 1735 src_y += 2; 1736 rgb_buf += 8; // Advance 2 pixels. 1737 } 1738 if (width & 1) { 1739 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1740 rgb_buf[3] = 255; 1741 } 1742 } 1743 1744 void MirrorRow_C(const uint8* src, uint8* dst, int width) { 1745 int x; 1746 src += width - 1; 1747 for (x = 0; x < width - 1; x += 2) { 1748 dst[x] = src[0]; 1749 dst[x + 1] = src[-1]; 1750 src -= 2; 1751 } 1752 if (width & 1) { 1753 dst[width - 1] = src[0]; 1754 } 1755 } 1756 1757 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 1758 int x; 1759 src_uv += (width - 1) << 1; 1760 for (x = 0; x < width - 1; x += 2) { 1761 dst_u[x] = src_uv[0]; 1762 dst_u[x + 1] = src_uv[-2]; 1763 dst_v[x] = src_uv[1]; 1764 dst_v[x + 1] = src_uv[-2 + 1]; 1765 src_uv -= 4; 1766 } 1767 if (width & 1) { 1768 dst_u[width - 1] = src_uv[0]; 1769 dst_v[width - 1] = src_uv[1]; 1770 } 1771 } 1772 1773 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { 1774 int x; 1775 const uint32* src32 = (const uint32*)(src); 1776 uint32* dst32 = (uint32*)(dst); 1777 src32 += width - 1; 1778 for (x = 0; x < width - 1; x += 2) { 1779 dst32[x] = src32[0]; 1780 dst32[x + 1] = src32[-1]; 1781 src32 -= 2; 1782 } 1783 if (width & 1) { 1784 dst32[width - 1] = src32[0]; 1785 } 1786 } 1787 1788 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 1789 int x; 1790 for (x = 0; x < width - 1; x += 2) { 1791 dst_u[x] = src_uv[0]; 1792 dst_u[x + 1] = src_uv[2]; 1793 dst_v[x] = src_uv[1]; 1794 dst_v[x + 1] = src_uv[3]; 1795 src_uv += 4; 1796 } 1797 if (width & 1) { 1798 dst_u[width - 1] = src_uv[0]; 1799 dst_v[width - 1] = src_uv[1]; 1800 } 1801 } 1802 1803 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 1804 int width) { 1805 int x; 1806 for (x = 0; x < width - 1; x += 2) { 1807 dst_uv[0] = src_u[x]; 1808 dst_uv[1] = src_v[x]; 1809 dst_uv[2] = src_u[x + 1]; 1810 dst_uv[3] = src_v[x + 1]; 1811 dst_uv += 4; 1812 } 1813 if (width & 1) { 1814 dst_uv[0] = src_u[width - 1]; 1815 dst_uv[1] = src_v[width - 1]; 1816 } 1817 } 1818 1819 void CopyRow_C(const uint8* src, uint8* dst, int count) { 1820 memcpy(dst, src, count); 1821 } 1822 1823 void CopyRow_16_C(const uint16* src, uint16* dst, int count) { 1824 memcpy(dst, src, count * 2); 1825 } 1826 1827 void SetRow_C(uint8* dst, uint8 v8, int width) { 1828 memset(dst, v8, width); 1829 } 1830 1831 void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) { 1832 uint32* d = (uint32*)(dst_argb); 1833 int x; 1834 for (x = 0; x < width; ++x) { 1835 d[x] = v32; 1836 } 1837 } 1838 1839 // Filter 2 rows of YUY2 UV's (422) into U and V (420). 1840 void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2, 1841 uint8* dst_u, uint8* dst_v, int width) { 1842 // Output a row of UV values, filtering 2 rows of YUY2. 1843 int x; 1844 for (x = 0; x < width; x += 2) { 1845 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1; 1846 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1; 1847 src_yuy2 += 4; 1848 dst_u += 1; 1849 dst_v += 1; 1850 } 1851 } 1852 1853 // Copy row of YUY2 UV's (422) into U and V (422). 1854 void YUY2ToUV422Row_C(const uint8* src_yuy2, 1855 uint8* dst_u, uint8* dst_v, int width) { 1856 // Output a row of UV values. 1857 int x; 1858 for (x = 0; x < width; x += 2) { 1859 dst_u[0] = src_yuy2[1]; 1860 dst_v[0] = src_yuy2[3]; 1861 src_yuy2 += 4; 1862 dst_u += 1; 1863 dst_v += 1; 1864 } 1865 } 1866 1867 // Copy row of YUY2 Y's (422) into Y (420/422). 1868 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) { 1869 // Output a row of Y values. 1870 int x; 1871 for (x = 0; x < width - 1; x += 2) { 1872 dst_y[x] = src_yuy2[0]; 1873 dst_y[x + 1] = src_yuy2[2]; 1874 src_yuy2 += 4; 1875 } 1876 if (width & 1) { 1877 dst_y[width - 1] = src_yuy2[0]; 1878 } 1879 } 1880 1881 // Filter 2 rows of UYVY UV's (422) into U and V (420). 1882 void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy, 1883 uint8* dst_u, uint8* dst_v, int width) { 1884 // Output a row of UV values. 1885 int x; 1886 for (x = 0; x < width; x += 2) { 1887 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1; 1888 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1; 1889 src_uyvy += 4; 1890 dst_u += 1; 1891 dst_v += 1; 1892 } 1893 } 1894 1895 // Copy row of UYVY UV's (422) into U and V (422). 1896 void UYVYToUV422Row_C(const uint8* src_uyvy, 1897 uint8* dst_u, uint8* dst_v, int width) { 1898 // Output a row of UV values. 1899 int x; 1900 for (x = 0; x < width; x += 2) { 1901 dst_u[0] = src_uyvy[0]; 1902 dst_v[0] = src_uyvy[2]; 1903 src_uyvy += 4; 1904 dst_u += 1; 1905 dst_v += 1; 1906 } 1907 } 1908 1909 // Copy row of UYVY Y's (422) into Y (420/422). 1910 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) { 1911 // Output a row of Y values. 1912 int x; 1913 for (x = 0; x < width - 1; x += 2) { 1914 dst_y[x] = src_uyvy[1]; 1915 dst_y[x + 1] = src_uyvy[3]; 1916 src_uyvy += 4; 1917 } 1918 if (width & 1) { 1919 dst_y[width - 1] = src_uyvy[1]; 1920 } 1921 } 1922 1923 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f 1924 1925 // Blend src_argb0 over src_argb1 and store to dst_argb. 1926 // dst_argb may be src_argb0 or src_argb1. 1927 // This code mimics the SSSE3 version for better testability. 1928 void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, 1929 uint8* dst_argb, int width) { 1930 int x; 1931 for (x = 0; x < width - 1; x += 2) { 1932 uint32 fb = src_argb0[0]; 1933 uint32 fg = src_argb0[1]; 1934 uint32 fr = src_argb0[2]; 1935 uint32 a = src_argb0[3]; 1936 uint32 bb = src_argb1[0]; 1937 uint32 bg = src_argb1[1]; 1938 uint32 br = src_argb1[2]; 1939 dst_argb[0] = BLEND(fb, bb, a); 1940 dst_argb[1] = BLEND(fg, bg, a); 1941 dst_argb[2] = BLEND(fr, br, a); 1942 dst_argb[3] = 255u; 1943 1944 fb = src_argb0[4 + 0]; 1945 fg = src_argb0[4 + 1]; 1946 fr = src_argb0[4 + 2]; 1947 a = src_argb0[4 + 3]; 1948 bb = src_argb1[4 + 0]; 1949 bg = src_argb1[4 + 1]; 1950 br = src_argb1[4 + 2]; 1951 dst_argb[4 + 0] = BLEND(fb, bb, a); 1952 dst_argb[4 + 1] = BLEND(fg, bg, a); 1953 dst_argb[4 + 2] = BLEND(fr, br, a); 1954 dst_argb[4 + 3] = 255u; 1955 src_argb0 += 8; 1956 src_argb1 += 8; 1957 dst_argb += 8; 1958 } 1959 1960 if (width & 1) { 1961 uint32 fb = src_argb0[0]; 1962 uint32 fg = src_argb0[1]; 1963 uint32 fr = src_argb0[2]; 1964 uint32 a = src_argb0[3]; 1965 uint32 bb = src_argb1[0]; 1966 uint32 bg = src_argb1[1]; 1967 uint32 br = src_argb1[2]; 1968 dst_argb[0] = BLEND(fb, bb, a); 1969 dst_argb[1] = BLEND(fg, bg, a); 1970 dst_argb[2] = BLEND(fr, br, a); 1971 dst_argb[3] = 255u; 1972 } 1973 } 1974 #undef BLEND 1975 1976 #define UBLEND(f, b, a) (((a) * f) + ((255 - a) * b) + 255) >> 8 1977 void BlendPlaneRow_C(const uint8* src0, const uint8* src1, 1978 const uint8* alpha, uint8* dst, int width) { 1979 int x; 1980 for (x = 0; x < width - 1; x += 2) { 1981 dst[0] = UBLEND(src0[0], src1[0], alpha[0]); 1982 dst[1] = UBLEND(src0[1], src1[1], alpha[1]); 1983 src0 += 2; 1984 src1 += 2; 1985 alpha += 2; 1986 dst += 2; 1987 } 1988 if (width & 1) { 1989 dst[0] = UBLEND(src0[0], src1[0], alpha[0]); 1990 } 1991 } 1992 #undef UBLEND 1993 1994 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24 1995 1996 // Multiply source RGB by alpha and store to destination. 1997 // This code mimics the SSSE3 version for better testability. 1998 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 1999 int i; 2000 for (i = 0; i < width - 1; i += 2) { 2001 uint32 b = src_argb[0]; 2002 uint32 g = src_argb[1]; 2003 uint32 r = src_argb[2]; 2004 uint32 a = src_argb[3]; 2005 dst_argb[0] = ATTENUATE(b, a); 2006 dst_argb[1] = ATTENUATE(g, a); 2007 dst_argb[2] = ATTENUATE(r, a); 2008 dst_argb[3] = a; 2009 b = src_argb[4]; 2010 g = src_argb[5]; 2011 r = src_argb[6]; 2012 a = src_argb[7]; 2013 dst_argb[4] = ATTENUATE(b, a); 2014 dst_argb[5] = ATTENUATE(g, a); 2015 dst_argb[6] = ATTENUATE(r, a); 2016 dst_argb[7] = a; 2017 src_argb += 8; 2018 dst_argb += 8; 2019 } 2020 2021 if (width & 1) { 2022 const uint32 b = src_argb[0]; 2023 const uint32 g = src_argb[1]; 2024 const uint32 r = src_argb[2]; 2025 const uint32 a = src_argb[3]; 2026 dst_argb[0] = ATTENUATE(b, a); 2027 dst_argb[1] = ATTENUATE(g, a); 2028 dst_argb[2] = ATTENUATE(r, a); 2029 dst_argb[3] = a; 2030 } 2031 } 2032 #undef ATTENUATE 2033 2034 // Divide source RGB by alpha and store to destination. 2035 // b = (b * 255 + (a / 2)) / a; 2036 // g = (g * 255 + (a / 2)) / a; 2037 // r = (r * 255 + (a / 2)) / a; 2038 // Reciprocal method is off by 1 on some values. ie 125 2039 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower. 2040 #define T(a) 0x01000000 + (0x10000 / a) 2041 const uint32 fixed_invtbl8[256] = { 2042 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07), 2043 T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f), 2044 T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17), 2045 T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f), 2046 T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), 2047 T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), 2048 T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37), 2049 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f), 2050 T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47), 2051 T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f), 2052 T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57), 2053 T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), 2054 T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), 2055 T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f), 2056 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77), 2057 T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f), 2058 T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87), 2059 T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f), 2060 T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), 2061 T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), 2062 T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7), 2063 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf), 2064 T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7), 2065 T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf), 2066 T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7), 2067 T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), 2068 T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), 2069 T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf), 2070 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7), 2071 T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef), 2072 T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7), 2073 T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 }; 2074 #undef T 2075 2076 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 2077 int i; 2078 for (i = 0; i < width; ++i) { 2079 uint32 b = src_argb[0]; 2080 uint32 g = src_argb[1]; 2081 uint32 r = src_argb[2]; 2082 const uint32 a = src_argb[3]; 2083 const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point 2084 b = (b * ia) >> 8; 2085 g = (g * ia) >> 8; 2086 r = (r * ia) >> 8; 2087 // Clamping should not be necessary but is free in assembly. 2088 dst_argb[0] = clamp255(b); 2089 dst_argb[1] = clamp255(g); 2090 dst_argb[2] = clamp255(r); 2091 dst_argb[3] = a; 2092 src_argb += 4; 2093 dst_argb += 4; 2094 } 2095 } 2096 2097 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, 2098 const int32* previous_cumsum, int width) { 2099 int32 row_sum[4] = {0, 0, 0, 0}; 2100 int x; 2101 for (x = 0; x < width; ++x) { 2102 row_sum[0] += row[x * 4 + 0]; 2103 row_sum[1] += row[x * 4 + 1]; 2104 row_sum[2] += row[x * 4 + 2]; 2105 row_sum[3] += row[x * 4 + 3]; 2106 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0]; 2107 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1]; 2108 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2]; 2109 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3]; 2110 } 2111 } 2112 2113 void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl, 2114 int w, int area, uint8* dst, int count) { 2115 float ooa = 1.0f / area; 2116 int i; 2117 for (i = 0; i < count; ++i) { 2118 dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); 2119 dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); 2120 dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); 2121 dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); 2122 dst += 4; 2123 tl += 4; 2124 bl += 4; 2125 } 2126 } 2127 2128 // Copy pixels from rotated source to destination row with a slope. 2129 LIBYUV_API 2130 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, 2131 uint8* dst_argb, const float* uv_dudv, int width) { 2132 int i; 2133 // Render a row of pixels from source into a buffer. 2134 float uv[2]; 2135 uv[0] = uv_dudv[0]; 2136 uv[1] = uv_dudv[1]; 2137 for (i = 0; i < width; ++i) { 2138 int x = (int)(uv[0]); 2139 int y = (int)(uv[1]); 2140 *(uint32*)(dst_argb) = 2141 *(const uint32*)(src_argb + y * src_argb_stride + 2142 x * 4); 2143 dst_argb += 4; 2144 uv[0] += uv_dudv[2]; 2145 uv[1] += uv_dudv[3]; 2146 } 2147 } 2148 2149 // Blend 2 rows into 1. 2150 static void HalfRow_C(const uint8* src_uv, ptrdiff_t src_uv_stride, 2151 uint8* dst_uv, int width) { 2152 int x; 2153 for (x = 0; x < width; ++x) { 2154 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; 2155 } 2156 } 2157 2158 static void HalfRow_16_C(const uint16* src_uv, ptrdiff_t src_uv_stride, 2159 uint16* dst_uv, int width) { 2160 int x; 2161 for (x = 0; x < width; ++x) { 2162 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; 2163 } 2164 } 2165 2166 // C version 2x2 -> 2x1. 2167 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, 2168 ptrdiff_t src_stride, 2169 int width, int source_y_fraction) { 2170 int y1_fraction = source_y_fraction ; 2171 int y0_fraction = 256 - y1_fraction; 2172 const uint8* src_ptr1 = src_ptr + src_stride; 2173 int x; 2174 if (y1_fraction == 0) { 2175 memcpy(dst_ptr, src_ptr, width); 2176 return; 2177 } 2178 if (y1_fraction == 128) { 2179 HalfRow_C(src_ptr, src_stride, dst_ptr, width); 2180 return; 2181 } 2182 for (x = 0; x < width - 1; x += 2) { 2183 dst_ptr[0] = 2184 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; 2185 dst_ptr[1] = 2186 (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8; 2187 src_ptr += 2; 2188 src_ptr1 += 2; 2189 dst_ptr += 2; 2190 } 2191 if (width & 1) { 2192 dst_ptr[0] = 2193 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; 2194 } 2195 } 2196 2197 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, 2198 ptrdiff_t src_stride, 2199 int width, int source_y_fraction) { 2200 int y1_fraction = source_y_fraction; 2201 int y0_fraction = 256 - y1_fraction; 2202 const uint16* src_ptr1 = src_ptr + src_stride; 2203 int x; 2204 if (source_y_fraction == 0) { 2205 memcpy(dst_ptr, src_ptr, width * 2); 2206 return; 2207 } 2208 if (source_y_fraction == 128) { 2209 HalfRow_16_C(src_ptr, src_stride, dst_ptr, width); 2210 return; 2211 } 2212 for (x = 0; x < width - 1; x += 2) { 2213 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 2214 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; 2215 src_ptr += 2; 2216 src_ptr1 += 2; 2217 dst_ptr += 2; 2218 } 2219 if (width & 1) { 2220 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 2221 } 2222 } 2223 2224 // Use first 4 shuffler values to reorder ARGB channels. 2225 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, 2226 const uint8* shuffler, int width) { 2227 int index0 = shuffler[0]; 2228 int index1 = shuffler[1]; 2229 int index2 = shuffler[2]; 2230 int index3 = shuffler[3]; 2231 // Shuffle a row of ARGB. 2232 int x; 2233 for (x = 0; x < width; ++x) { 2234 // To support in-place conversion. 2235 uint8 b = src_argb[index0]; 2236 uint8 g = src_argb[index1]; 2237 uint8 r = src_argb[index2]; 2238 uint8 a = src_argb[index3]; 2239 dst_argb[0] = b; 2240 dst_argb[1] = g; 2241 dst_argb[2] = r; 2242 dst_argb[3] = a; 2243 src_argb += 4; 2244 dst_argb += 4; 2245 } 2246 } 2247 2248 void I422ToYUY2Row_C(const uint8* src_y, 2249 const uint8* src_u, 2250 const uint8* src_v, 2251 uint8* dst_frame, int width) { 2252 int x; 2253 for (x = 0; x < width - 1; x += 2) { 2254 dst_frame[0] = src_y[0]; 2255 dst_frame[1] = src_u[0]; 2256 dst_frame[2] = src_y[1]; 2257 dst_frame[3] = src_v[0]; 2258 dst_frame += 4; 2259 src_y += 2; 2260 src_u += 1; 2261 src_v += 1; 2262 } 2263 if (width & 1) { 2264 dst_frame[0] = src_y[0]; 2265 dst_frame[1] = src_u[0]; 2266 dst_frame[2] = 0; 2267 dst_frame[3] = src_v[0]; 2268 } 2269 } 2270 2271 void I422ToUYVYRow_C(const uint8* src_y, 2272 const uint8* src_u, 2273 const uint8* src_v, 2274 uint8* dst_frame, int width) { 2275 int x; 2276 for (x = 0; x < width - 1; x += 2) { 2277 dst_frame[0] = src_u[0]; 2278 dst_frame[1] = src_y[0]; 2279 dst_frame[2] = src_v[0]; 2280 dst_frame[3] = src_y[1]; 2281 dst_frame += 4; 2282 src_y += 2; 2283 src_u += 1; 2284 src_v += 1; 2285 } 2286 if (width & 1) { 2287 dst_frame[0] = src_u[0]; 2288 dst_frame[1] = src_y[0]; 2289 dst_frame[2] = src_v[0]; 2290 dst_frame[3] = 0; 2291 } 2292 } 2293 2294 2295 void ARGBPolynomialRow_C(const uint8* src_argb, 2296 uint8* dst_argb, 2297 const float* poly, 2298 int width) { 2299 int i; 2300 for (i = 0; i < width; ++i) { 2301 float b = (float)(src_argb[0]); 2302 float g = (float)(src_argb[1]); 2303 float r = (float)(src_argb[2]); 2304 float a = (float)(src_argb[3]); 2305 float b2 = b * b; 2306 float g2 = g * g; 2307 float r2 = r * r; 2308 float a2 = a * a; 2309 float db = poly[0] + poly[4] * b; 2310 float dg = poly[1] + poly[5] * g; 2311 float dr = poly[2] + poly[6] * r; 2312 float da = poly[3] + poly[7] * a; 2313 float b3 = b2 * b; 2314 float g3 = g2 * g; 2315 float r3 = r2 * r; 2316 float a3 = a2 * a; 2317 db += poly[8] * b2; 2318 dg += poly[9] * g2; 2319 dr += poly[10] * r2; 2320 da += poly[11] * a2; 2321 db += poly[12] * b3; 2322 dg += poly[13] * g3; 2323 dr += poly[14] * r3; 2324 da += poly[15] * a3; 2325 2326 dst_argb[0] = Clamp((int32)(db)); 2327 dst_argb[1] = Clamp((int32)(dg)); 2328 dst_argb[2] = Clamp((int32)(dr)); 2329 dst_argb[3] = Clamp((int32)(da)); 2330 src_argb += 4; 2331 dst_argb += 4; 2332 } 2333 } 2334 2335 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, 2336 const uint8* luma, uint32 lumacoeff) { 2337 uint32 bc = lumacoeff & 0xff; 2338 uint32 gc = (lumacoeff >> 8) & 0xff; 2339 uint32 rc = (lumacoeff >> 16) & 0xff; 2340 2341 int i; 2342 for (i = 0; i < width - 1; i += 2) { 2343 // Luminance in rows, color values in columns. 2344 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + 2345 src_argb[2] * rc) & 0x7F00u) + luma; 2346 const uint8* luma1; 2347 dst_argb[0] = luma0[src_argb[0]]; 2348 dst_argb[1] = luma0[src_argb[1]]; 2349 dst_argb[2] = luma0[src_argb[2]]; 2350 dst_argb[3] = src_argb[3]; 2351 luma1 = ((src_argb[4] * bc + src_argb[5] * gc + 2352 src_argb[6] * rc) & 0x7F00u) + luma; 2353 dst_argb[4] = luma1[src_argb[4]]; 2354 dst_argb[5] = luma1[src_argb[5]]; 2355 dst_argb[6] = luma1[src_argb[6]]; 2356 dst_argb[7] = src_argb[7]; 2357 src_argb += 8; 2358 dst_argb += 8; 2359 } 2360 if (width & 1) { 2361 // Luminance in rows, color values in columns. 2362 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + 2363 src_argb[2] * rc) & 0x7F00u) + luma; 2364 dst_argb[0] = luma0[src_argb[0]]; 2365 dst_argb[1] = luma0[src_argb[1]]; 2366 dst_argb[2] = luma0[src_argb[2]]; 2367 dst_argb[3] = src_argb[3]; 2368 } 2369 } 2370 2371 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { 2372 int i; 2373 for (i = 0; i < width - 1; i += 2) { 2374 dst[3] = src[3]; 2375 dst[7] = src[7]; 2376 dst += 8; 2377 src += 8; 2378 } 2379 if (width & 1) { 2380 dst[3] = src[3]; 2381 } 2382 } 2383 2384 void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) { 2385 int i; 2386 for (i = 0; i < width - 1; i += 2) { 2387 dst_a[0] = src_argb[3]; 2388 dst_a[1] = src_argb[7]; 2389 dst_a += 2; 2390 src_argb += 8; 2391 } 2392 if (width & 1) { 2393 dst_a[0] = src_argb[3]; 2394 } 2395 } 2396 2397 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { 2398 int i; 2399 for (i = 0; i < width - 1; i += 2) { 2400 dst[3] = src[0]; 2401 dst[7] = src[1]; 2402 dst += 8; 2403 src += 2; 2404 } 2405 if (width & 1) { 2406 dst[3] = src[0]; 2407 } 2408 } 2409 2410 // Maximum temporary width for wrappers to process at a time, in pixels. 2411 #define MAXTWIDTH 2048 2412 2413 #if !(defined(_MSC_VER) && defined(_M_IX86)) && \ 2414 defined(HAS_I422TORGB565ROW_SSSE3) 2415 // row_win.cc has asm version, but GCC uses 2 step wrapper. 2416 void I422ToRGB565Row_SSSE3(const uint8* src_y, 2417 const uint8* src_u, 2418 const uint8* src_v, 2419 uint8* dst_rgb565, 2420 const struct YuvConstants* yuvconstants, 2421 int width) { 2422 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2423 while (width > 0) { 2424 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2425 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); 2426 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2427 src_y += twidth; 2428 src_u += twidth / 2; 2429 src_v += twidth / 2; 2430 dst_rgb565 += twidth * 2; 2431 width -= twidth; 2432 } 2433 } 2434 #endif 2435 2436 #if defined(HAS_I422TOARGB1555ROW_SSSE3) 2437 void I422ToARGB1555Row_SSSE3(const uint8* src_y, 2438 const uint8* src_u, 2439 const uint8* src_v, 2440 uint8* dst_argb1555, 2441 const struct YuvConstants* yuvconstants, 2442 int width) { 2443 // Row buffer for intermediate ARGB pixels. 2444 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2445 while (width > 0) { 2446 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2447 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); 2448 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth); 2449 src_y += twidth; 2450 src_u += twidth / 2; 2451 src_v += twidth / 2; 2452 dst_argb1555 += twidth * 2; 2453 width -= twidth; 2454 } 2455 } 2456 #endif 2457 2458 #if defined(HAS_I422TOARGB4444ROW_SSSE3) 2459 void I422ToARGB4444Row_SSSE3(const uint8* src_y, 2460 const uint8* src_u, 2461 const uint8* src_v, 2462 uint8* dst_argb4444, 2463 const struct YuvConstants* yuvconstants, 2464 int width) { 2465 // Row buffer for intermediate ARGB pixels. 2466 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2467 while (width > 0) { 2468 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2469 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); 2470 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth); 2471 src_y += twidth; 2472 src_u += twidth / 2; 2473 src_v += twidth / 2; 2474 dst_argb4444 += twidth * 2; 2475 width -= twidth; 2476 } 2477 } 2478 #endif 2479 2480 #if defined(HAS_NV12TORGB565ROW_SSSE3) 2481 void NV12ToRGB565Row_SSSE3(const uint8* src_y, 2482 const uint8* src_uv, 2483 uint8* dst_rgb565, 2484 const struct YuvConstants* yuvconstants, 2485 int width) { 2486 // Row buffer for intermediate ARGB pixels. 2487 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2488 while (width > 0) { 2489 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2490 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth); 2491 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2492 src_y += twidth; 2493 src_uv += twidth; 2494 dst_rgb565 += twidth * 2; 2495 width -= twidth; 2496 } 2497 } 2498 #endif 2499 2500 #if defined(HAS_I422TORGB565ROW_AVX2) 2501 void I422ToRGB565Row_AVX2(const uint8* src_y, 2502 const uint8* src_u, 2503 const uint8* src_v, 2504 uint8* dst_rgb565, 2505 const struct YuvConstants* yuvconstants, 2506 int width) { 2507 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2508 while (width > 0) { 2509 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2510 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); 2511 #if defined(HAS_ARGBTORGB565ROW_AVX2) 2512 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); 2513 #else 2514 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2515 #endif 2516 src_y += twidth; 2517 src_u += twidth / 2; 2518 src_v += twidth / 2; 2519 dst_rgb565 += twidth * 2; 2520 width -= twidth; 2521 } 2522 } 2523 #endif 2524 2525 #if defined(HAS_I422TOARGB1555ROW_AVX2) 2526 void I422ToARGB1555Row_AVX2(const uint8* src_y, 2527 const uint8* src_u, 2528 const uint8* src_v, 2529 uint8* dst_argb1555, 2530 const struct YuvConstants* yuvconstants, 2531 int width) { 2532 // Row buffer for intermediate ARGB pixels. 2533 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2534 while (width > 0) { 2535 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2536 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); 2537 #if defined(HAS_ARGBTOARGB1555ROW_AVX2) 2538 ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth); 2539 #else 2540 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth); 2541 #endif 2542 src_y += twidth; 2543 src_u += twidth / 2; 2544 src_v += twidth / 2; 2545 dst_argb1555 += twidth * 2; 2546 width -= twidth; 2547 } 2548 } 2549 #endif 2550 2551 #if defined(HAS_I422TOARGB4444ROW_AVX2) 2552 void I422ToARGB4444Row_AVX2(const uint8* src_y, 2553 const uint8* src_u, 2554 const uint8* src_v, 2555 uint8* dst_argb4444, 2556 const struct YuvConstants* yuvconstants, 2557 int width) { 2558 // Row buffer for intermediate ARGB pixels. 2559 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2560 while (width > 0) { 2561 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2562 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); 2563 #if defined(HAS_ARGBTOARGB4444ROW_AVX2) 2564 ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth); 2565 #else 2566 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth); 2567 #endif 2568 src_y += twidth; 2569 src_u += twidth / 2; 2570 src_v += twidth / 2; 2571 dst_argb4444 += twidth * 2; 2572 width -= twidth; 2573 } 2574 } 2575 #endif 2576 2577 #if defined(HAS_I422TORGB24ROW_AVX2) 2578 void I422ToRGB24Row_AVX2(const uint8* src_y, 2579 const uint8* src_u, 2580 const uint8* src_v, 2581 uint8* dst_rgb24, 2582 const struct YuvConstants* yuvconstants, 2583 int width) { 2584 // Row buffer for intermediate ARGB pixels. 2585 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2586 while (width > 0) { 2587 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2588 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); 2589 // TODO(fbarchard): ARGBToRGB24Row_AVX2 2590 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); 2591 src_y += twidth; 2592 src_u += twidth / 2; 2593 src_v += twidth / 2; 2594 dst_rgb24 += twidth * 3; 2595 width -= twidth; 2596 } 2597 } 2598 #endif 2599 2600 #if defined(HAS_NV12TORGB565ROW_AVX2) 2601 void NV12ToRGB565Row_AVX2(const uint8* src_y, 2602 const uint8* src_uv, 2603 uint8* dst_rgb565, 2604 const struct YuvConstants* yuvconstants, 2605 int width) { 2606 // Row buffer for intermediate ARGB pixels. 2607 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2608 while (width > 0) { 2609 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2610 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); 2611 #if defined(HAS_ARGBTORGB565ROW_AVX2) 2612 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); 2613 #else 2614 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2615 #endif 2616 src_y += twidth; 2617 src_uv += twidth; 2618 dst_rgb565 += twidth * 2; 2619 width -= twidth; 2620 } 2621 } 2622 #endif 2623 2624 #ifdef __cplusplus 2625 } // extern "C" 2626 } // namespace libyuv 2627 #endif 2628