1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/row.h" 12 13 #include <string.h> // For memcpy and memset. 14 15 #include "libyuv/basic_types.h" 16 17 #ifdef __cplusplus 18 namespace libyuv { 19 extern "C" { 20 #endif 21 22 // llvm x86 is poor at ternary operator, so use branchless min/max. 23 24 #define USE_BRANCHLESS 1 25 #if USE_BRANCHLESS 26 static __inline int32 clamp0(int32 v) { 27 return ((-(v) >> 31) & (v)); 28 } 29 30 static __inline int32 clamp255(int32 v) { 31 return (((255 - (v)) >> 31) | (v)) & 255; 32 } 33 34 static __inline uint32 Clamp(int32 val) { 35 int v = clamp0(val); 36 return (uint32)(clamp255(v)); 37 } 38 39 static __inline uint32 Abs(int32 v) { 40 int m = v >> 31; 41 return (v + m) ^ m; 42 } 43 #else // USE_BRANCHLESS 44 static __inline int32 clamp0(int32 v) { 45 return (v < 0) ? 0 : v; 46 } 47 48 static __inline int32 clamp255(int32 v) { 49 return (v > 255) ? 255 : v; 50 } 51 52 static __inline uint32 Clamp(int32 val) { 53 int v = clamp0(val); 54 return (uint32)(clamp255(v)); 55 } 56 57 static __inline uint32 Abs(int32 v) { 58 return (v < 0) ? -v : v; 59 } 60 #endif // USE_BRANCHLESS 61 62 #ifdef LIBYUV_LITTLE_ENDIAN 63 #define WRITEWORD(p, v) *(uint32*)(p) = v 64 #else 65 static inline void WRITEWORD(uint8* p, uint32 v) { 66 p[0] = (uint8)(v & 255); 67 p[1] = (uint8)((v >> 8) & 255); 68 p[2] = (uint8)((v >> 16) & 255); 69 p[3] = (uint8)((v >> 24) & 255); 70 } 71 #endif 72 73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { 74 int x; 75 for (x = 0; x < width; ++x) { 76 uint8 b = src_rgb24[0]; 77 uint8 g = src_rgb24[1]; 78 uint8 r = src_rgb24[2]; 79 dst_argb[0] = b; 80 dst_argb[1] = g; 81 dst_argb[2] = r; 82 dst_argb[3] = 255u; 83 dst_argb += 4; 84 src_rgb24 += 3; 85 } 86 } 87 88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { 89 int x; 90 for (x = 0; x < width; ++x) { 91 uint8 r = src_raw[0]; 92 uint8 g = src_raw[1]; 93 uint8 b = src_raw[2]; 94 dst_argb[0] = b; 95 dst_argb[1] = g; 96 dst_argb[2] = r; 97 dst_argb[3] = 255u; 98 dst_argb += 4; 99 src_raw += 3; 100 } 101 } 102 103 void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) { 104 int x; 105 for (x = 0; x < width; ++x) { 106 uint8 r = src_raw[0]; 107 uint8 g = src_raw[1]; 108 uint8 b = src_raw[2]; 109 dst_rgb24[0] = b; 110 dst_rgb24[1] = g; 111 dst_rgb24[2] = r; 112 dst_rgb24 += 3; 113 src_raw += 3; 114 } 115 } 116 117 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) { 118 int x; 119 for (x = 0; x < width; ++x) { 120 uint8 b = src_rgb565[0] & 0x1f; 121 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 122 uint8 r = src_rgb565[1] >> 3; 123 dst_argb[0] = (b << 3) | (b >> 2); 124 dst_argb[1] = (g << 2) | (g >> 4); 125 dst_argb[2] = (r << 3) | (r >> 2); 126 dst_argb[3] = 255u; 127 dst_argb += 4; 128 src_rgb565 += 2; 129 } 130 } 131 132 void ARGB1555ToARGBRow_C(const uint8* src_argb1555, 133 uint8* dst_argb, 134 int width) { 135 int x; 136 for (x = 0; x < width; ++x) { 137 uint8 b = src_argb1555[0] & 0x1f; 138 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 139 uint8 r = (src_argb1555[1] & 0x7c) >> 2; 140 uint8 a = src_argb1555[1] >> 7; 141 dst_argb[0] = (b << 3) | (b >> 2); 142 dst_argb[1] = (g << 3) | (g >> 2); 143 dst_argb[2] = (r << 3) | (r >> 2); 144 dst_argb[3] = -a; 145 dst_argb += 4; 146 src_argb1555 += 2; 147 } 148 } 149 150 void ARGB4444ToARGBRow_C(const uint8* src_argb4444, 151 uint8* dst_argb, 152 int width) { 153 int x; 154 for (x = 0; x < width; ++x) { 155 uint8 b = src_argb4444[0] & 0x0f; 156 uint8 g = src_argb4444[0] >> 4; 157 uint8 r = src_argb4444[1] & 0x0f; 158 uint8 a = src_argb4444[1] >> 4; 159 dst_argb[0] = (b << 4) | b; 160 dst_argb[1] = (g << 4) | g; 161 dst_argb[2] = (r << 4) | r; 162 dst_argb[3] = (a << 4) | a; 163 dst_argb += 4; 164 src_argb4444 += 2; 165 } 166 } 167 168 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 169 int x; 170 for (x = 0; x < width; ++x) { 171 uint8 b = src_argb[0]; 172 uint8 g = src_argb[1]; 173 uint8 r = src_argb[2]; 174 dst_rgb[0] = b; 175 dst_rgb[1] = g; 176 dst_rgb[2] = r; 177 dst_rgb += 3; 178 src_argb += 4; 179 } 180 } 181 182 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { 183 int x; 184 for (x = 0; x < width; ++x) { 185 uint8 b = src_argb[0]; 186 uint8 g = src_argb[1]; 187 uint8 r = src_argb[2]; 188 dst_rgb[0] = r; 189 dst_rgb[1] = g; 190 dst_rgb[2] = b; 191 dst_rgb += 3; 192 src_argb += 4; 193 } 194 } 195 196 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 197 int x; 198 for (x = 0; x < width - 1; x += 2) { 199 uint8 b0 = src_argb[0] >> 3; 200 uint8 g0 = src_argb[1] >> 2; 201 uint8 r0 = src_argb[2] >> 3; 202 uint8 b1 = src_argb[4] >> 3; 203 uint8 g1 = src_argb[5] >> 2; 204 uint8 r1 = src_argb[6] >> 3; 205 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | 206 (r1 << 27)); 207 dst_rgb += 4; 208 src_argb += 8; 209 } 210 if (width & 1) { 211 uint8 b0 = src_argb[0] >> 3; 212 uint8 g0 = src_argb[1] >> 2; 213 uint8 r0 = src_argb[2] >> 3; 214 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); 215 } 216 } 217 218 // dither4 is a row of 4 values from 4x4 dither matrix. 219 // The 4x4 matrix contains values to increase RGB. When converting to 220 // fewer bits (565) this provides an ordered dither. 221 // The order in the 4x4 matrix in first byte is upper left. 222 // The 4 values are passed as an int, then referenced as an array, so 223 // endian will not affect order of the original matrix. But the dither4 224 // will containing the first pixel in the lower byte for little endian 225 // or the upper byte for big endian. 226 void ARGBToRGB565DitherRow_C(const uint8* src_argb, 227 uint8* dst_rgb, 228 const uint32 dither4, 229 int width) { 230 int x; 231 for (x = 0; x < width - 1; x += 2) { 232 int dither0 = ((const unsigned char*)(&dither4))[x & 3]; 233 int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3]; 234 uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; 235 uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; 236 uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; 237 uint8 b1 = clamp255(src_argb[4] + dither1) >> 3; 238 uint8 g1 = clamp255(src_argb[5] + dither1) >> 2; 239 uint8 r1 = clamp255(src_argb[6] + dither1) >> 3; 240 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | 241 (r1 << 27)); 242 dst_rgb += 4; 243 src_argb += 8; 244 } 245 if (width & 1) { 246 int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3]; 247 uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; 248 uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; 249 uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; 250 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); 251 } 252 } 253 254 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 255 int x; 256 for (x = 0; x < width - 1; x += 2) { 257 uint8 b0 = src_argb[0] >> 3; 258 uint8 g0 = src_argb[1] >> 3; 259 uint8 r0 = src_argb[2] >> 3; 260 uint8 a0 = src_argb[3] >> 7; 261 uint8 b1 = src_argb[4] >> 3; 262 uint8 g1 = src_argb[5] >> 3; 263 uint8 r1 = src_argb[6] >> 3; 264 uint8 a1 = src_argb[7] >> 7; 265 *(uint32*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | 266 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); 267 dst_rgb += 4; 268 src_argb += 8; 269 } 270 if (width & 1) { 271 uint8 b0 = src_argb[0] >> 3; 272 uint8 g0 = src_argb[1] >> 3; 273 uint8 r0 = src_argb[2] >> 3; 274 uint8 a0 = src_argb[3] >> 7; 275 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); 276 } 277 } 278 279 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 280 int x; 281 for (x = 0; x < width - 1; x += 2) { 282 uint8 b0 = src_argb[0] >> 4; 283 uint8 g0 = src_argb[1] >> 4; 284 uint8 r0 = src_argb[2] >> 4; 285 uint8 a0 = src_argb[3] >> 4; 286 uint8 b1 = src_argb[4] >> 4; 287 uint8 g1 = src_argb[5] >> 4; 288 uint8 r1 = src_argb[6] >> 4; 289 uint8 a1 = src_argb[7] >> 4; 290 *(uint32*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | (b1 << 16) | 291 (g1 << 20) | (r1 << 24) | (a1 << 28); 292 dst_rgb += 4; 293 src_argb += 8; 294 } 295 if (width & 1) { 296 uint8 b0 = src_argb[0] >> 4; 297 uint8 g0 = src_argb[1] >> 4; 298 uint8 r0 = src_argb[2] >> 4; 299 uint8 a0 = src_argb[3] >> 4; 300 *(uint16*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); 301 } 302 } 303 304 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) { 305 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; 306 } 307 308 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) { 309 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; 310 } 311 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) { 312 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; 313 } 314 315 // ARGBToY_C and ARGBToUV_C 316 #define MAKEROWY(NAME, R, G, B, BPP) \ 317 void NAME##ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ 318 int x; \ 319 for (x = 0; x < width; ++x) { \ 320 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ 321 src_argb0 += BPP; \ 322 dst_y += 1; \ 323 } \ 324 } \ 325 void NAME##ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ 326 uint8* dst_u, uint8* dst_v, int width) { \ 327 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ 328 int x; \ 329 for (x = 0; x < width - 1; x += 2) { \ 330 uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \ 331 src_rgb1[B + BPP]) >> \ 332 2; \ 333 uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \ 334 src_rgb1[G + BPP]) >> \ 335 2; \ 336 uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \ 337 src_rgb1[R + BPP]) >> \ 338 2; \ 339 dst_u[0] = RGBToU(ar, ag, ab); \ 340 dst_v[0] = RGBToV(ar, ag, ab); \ 341 src_rgb0 += BPP * 2; \ 342 src_rgb1 += BPP * 2; \ 343 dst_u += 1; \ 344 dst_v += 1; \ 345 } \ 346 if (width & 1) { \ 347 uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ 348 uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ 349 uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ 350 dst_u[0] = RGBToU(ar, ag, ab); \ 351 dst_v[0] = RGBToV(ar, ag, ab); \ 352 } \ 353 } 354 355 MAKEROWY(ARGB, 2, 1, 0, 4) 356 MAKEROWY(BGRA, 1, 2, 3, 4) 357 MAKEROWY(ABGR, 0, 1, 2, 4) 358 MAKEROWY(RGBA, 3, 2, 1, 4) 359 MAKEROWY(RGB24, 2, 1, 0, 3) 360 MAKEROWY(RAW, 0, 1, 2, 3) 361 #undef MAKEROWY 362 363 // JPeg uses a variation on BT.601-1 full range 364 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b 365 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center 366 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center 367 // BT.601 Mpeg range uses: 368 // b 0.1016 * 255 = 25.908 = 25 369 // g 0.5078 * 255 = 129.489 = 129 370 // r 0.2578 * 255 = 65.739 = 66 371 // JPeg 8 bit Y (not used): 372 // b 0.11400 * 256 = 29.184 = 29 373 // g 0.58700 * 256 = 150.272 = 150 374 // r 0.29900 * 256 = 76.544 = 77 375 // JPeg 7 bit Y: 376 // b 0.11400 * 128 = 14.592 = 15 377 // g 0.58700 * 128 = 75.136 = 75 378 // r 0.29900 * 128 = 38.272 = 38 379 // JPeg 8 bit U: 380 // b 0.50000 * 255 = 127.5 = 127 381 // g -0.33126 * 255 = -84.4713 = -84 382 // r -0.16874 * 255 = -43.0287 = -43 383 // JPeg 8 bit V: 384 // b -0.08131 * 255 = -20.73405 = -20 385 // g -0.41869 * 255 = -106.76595 = -107 386 // r 0.50000 * 255 = 127.5 = 127 387 388 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) { 389 return (38 * r + 75 * g + 15 * b + 64) >> 7; 390 } 391 392 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) { 393 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; 394 } 395 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) { 396 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; 397 } 398 399 #define AVGB(a, b) (((a) + (b) + 1) >> 1) 400 401 // ARGBToYJ_C and ARGBToUVJ_C 402 #define MAKEROWYJ(NAME, R, G, B, BPP) \ 403 void NAME##ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ 404 int x; \ 405 for (x = 0; x < width; ++x) { \ 406 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ 407 src_argb0 += BPP; \ 408 dst_y += 1; \ 409 } \ 410 } \ 411 void NAME##ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \ 412 uint8* dst_u, uint8* dst_v, int width) { \ 413 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ 414 int x; \ 415 for (x = 0; x < width - 1; x += 2) { \ 416 uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ 417 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ 418 uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ 419 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ 420 uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ 421 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ 422 dst_u[0] = RGBToUJ(ar, ag, ab); \ 423 dst_v[0] = RGBToVJ(ar, ag, ab); \ 424 src_rgb0 += BPP * 2; \ 425 src_rgb1 += BPP * 2; \ 426 dst_u += 1; \ 427 dst_v += 1; \ 428 } \ 429 if (width & 1) { \ 430 uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \ 431 uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \ 432 uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \ 433 dst_u[0] = RGBToUJ(ar, ag, ab); \ 434 dst_v[0] = RGBToVJ(ar, ag, ab); \ 435 } \ 436 } 437 438 MAKEROWYJ(ARGB, 2, 1, 0, 4) 439 #undef MAKEROWYJ 440 441 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { 442 int x; 443 for (x = 0; x < width; ++x) { 444 uint8 b = src_rgb565[0] & 0x1f; 445 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 446 uint8 r = src_rgb565[1] >> 3; 447 b = (b << 3) | (b >> 2); 448 g = (g << 2) | (g >> 4); 449 r = (r << 3) | (r >> 2); 450 dst_y[0] = RGBToY(r, g, b); 451 src_rgb565 += 2; 452 dst_y += 1; 453 } 454 } 455 456 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) { 457 int x; 458 for (x = 0; x < width; ++x) { 459 uint8 b = src_argb1555[0] & 0x1f; 460 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 461 uint8 r = (src_argb1555[1] & 0x7c) >> 2; 462 b = (b << 3) | (b >> 2); 463 g = (g << 3) | (g >> 2); 464 r = (r << 3) | (r >> 2); 465 dst_y[0] = RGBToY(r, g, b); 466 src_argb1555 += 2; 467 dst_y += 1; 468 } 469 } 470 471 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) { 472 int x; 473 for (x = 0; x < width; ++x) { 474 uint8 b = src_argb4444[0] & 0x0f; 475 uint8 g = src_argb4444[0] >> 4; 476 uint8 r = src_argb4444[1] & 0x0f; 477 b = (b << 4) | b; 478 g = (g << 4) | g; 479 r = (r << 4) | r; 480 dst_y[0] = RGBToY(r, g, b); 481 src_argb4444 += 2; 482 dst_y += 1; 483 } 484 } 485 486 void RGB565ToUVRow_C(const uint8* src_rgb565, 487 int src_stride_rgb565, 488 uint8* dst_u, 489 uint8* dst_v, 490 int width) { 491 const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565; 492 int x; 493 for (x = 0; x < width - 1; x += 2) { 494 uint8 b0 = src_rgb565[0] & 0x1f; 495 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 496 uint8 r0 = src_rgb565[1] >> 3; 497 uint8 b1 = src_rgb565[2] & 0x1f; 498 uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); 499 uint8 r1 = src_rgb565[3] >> 3; 500 uint8 b2 = next_rgb565[0] & 0x1f; 501 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); 502 uint8 r2 = next_rgb565[1] >> 3; 503 uint8 b3 = next_rgb565[2] & 0x1f; 504 uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); 505 uint8 r3 = next_rgb565[3] >> 3; 506 uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787. 507 uint8 g = (g0 + g1 + g2 + g3); 508 uint8 r = (r0 + r1 + r2 + r3); 509 b = (b << 1) | (b >> 6); // 787 -> 888. 510 r = (r << 1) | (r >> 6); 511 dst_u[0] = RGBToU(r, g, b); 512 dst_v[0] = RGBToV(r, g, b); 513 src_rgb565 += 4; 514 next_rgb565 += 4; 515 dst_u += 1; 516 dst_v += 1; 517 } 518 if (width & 1) { 519 uint8 b0 = src_rgb565[0] & 0x1f; 520 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 521 uint8 r0 = src_rgb565[1] >> 3; 522 uint8 b2 = next_rgb565[0] & 0x1f; 523 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); 524 uint8 r2 = next_rgb565[1] >> 3; 525 uint8 b = (b0 + b2); // 565 * 2 = 676. 526 uint8 g = (g0 + g2); 527 uint8 r = (r0 + r2); 528 b = (b << 2) | (b >> 4); // 676 -> 888 529 g = (g << 1) | (g >> 6); 530 r = (r << 2) | (r >> 4); 531 dst_u[0] = RGBToU(r, g, b); 532 dst_v[0] = RGBToV(r, g, b); 533 } 534 } 535 536 void ARGB1555ToUVRow_C(const uint8* src_argb1555, 537 int src_stride_argb1555, 538 uint8* dst_u, 539 uint8* dst_v, 540 int width) { 541 const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555; 542 int x; 543 for (x = 0; x < width - 1; x += 2) { 544 uint8 b0 = src_argb1555[0] & 0x1f; 545 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 546 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; 547 uint8 b1 = src_argb1555[2] & 0x1f; 548 uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); 549 uint8 r1 = (src_argb1555[3] & 0x7c) >> 2; 550 uint8 b2 = next_argb1555[0] & 0x1f; 551 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); 552 uint8 r2 = (next_argb1555[1] & 0x7c) >> 2; 553 uint8 b3 = next_argb1555[2] & 0x1f; 554 uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); 555 uint8 r3 = (next_argb1555[3] & 0x7c) >> 2; 556 uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777. 557 uint8 g = (g0 + g1 + g2 + g3); 558 uint8 r = (r0 + r1 + r2 + r3); 559 b = (b << 1) | (b >> 6); // 777 -> 888. 560 g = (g << 1) | (g >> 6); 561 r = (r << 1) | (r >> 6); 562 dst_u[0] = RGBToU(r, g, b); 563 dst_v[0] = RGBToV(r, g, b); 564 src_argb1555 += 4; 565 next_argb1555 += 4; 566 dst_u += 1; 567 dst_v += 1; 568 } 569 if (width & 1) { 570 uint8 b0 = src_argb1555[0] & 0x1f; 571 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 572 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; 573 uint8 b2 = next_argb1555[0] & 0x1f; 574 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); 575 uint8 r2 = next_argb1555[1] >> 3; 576 uint8 b = (b0 + b2); // 555 * 2 = 666. 577 uint8 g = (g0 + g2); 578 uint8 r = (r0 + r2); 579 b = (b << 2) | (b >> 4); // 666 -> 888. 580 g = (g << 2) | (g >> 4); 581 r = (r << 2) | (r >> 4); 582 dst_u[0] = RGBToU(r, g, b); 583 dst_v[0] = RGBToV(r, g, b); 584 } 585 } 586 587 void ARGB4444ToUVRow_C(const uint8* src_argb4444, 588 int src_stride_argb4444, 589 uint8* dst_u, 590 uint8* dst_v, 591 int width) { 592 const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444; 593 int x; 594 for (x = 0; x < width - 1; x += 2) { 595 uint8 b0 = src_argb4444[0] & 0x0f; 596 uint8 g0 = src_argb4444[0] >> 4; 597 uint8 r0 = src_argb4444[1] & 0x0f; 598 uint8 b1 = src_argb4444[2] & 0x0f; 599 uint8 g1 = src_argb4444[2] >> 4; 600 uint8 r1 = src_argb4444[3] & 0x0f; 601 uint8 b2 = next_argb4444[0] & 0x0f; 602 uint8 g2 = next_argb4444[0] >> 4; 603 uint8 r2 = next_argb4444[1] & 0x0f; 604 uint8 b3 = next_argb4444[2] & 0x0f; 605 uint8 g3 = next_argb4444[2] >> 4; 606 uint8 r3 = next_argb4444[3] & 0x0f; 607 uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666. 608 uint8 g = (g0 + g1 + g2 + g3); 609 uint8 r = (r0 + r1 + r2 + r3); 610 b = (b << 2) | (b >> 4); // 666 -> 888. 611 g = (g << 2) | (g >> 4); 612 r = (r << 2) | (r >> 4); 613 dst_u[0] = RGBToU(r, g, b); 614 dst_v[0] = RGBToV(r, g, b); 615 src_argb4444 += 4; 616 next_argb4444 += 4; 617 dst_u += 1; 618 dst_v += 1; 619 } 620 if (width & 1) { 621 uint8 b0 = src_argb4444[0] & 0x0f; 622 uint8 g0 = src_argb4444[0] >> 4; 623 uint8 r0 = src_argb4444[1] & 0x0f; 624 uint8 b2 = next_argb4444[0] & 0x0f; 625 uint8 g2 = next_argb4444[0] >> 4; 626 uint8 r2 = next_argb4444[1] & 0x0f; 627 uint8 b = (b0 + b2); // 444 * 2 = 555. 628 uint8 g = (g0 + g2); 629 uint8 r = (r0 + r2); 630 b = (b << 3) | (b >> 2); // 555 -> 888. 631 g = (g << 3) | (g >> 2); 632 r = (r << 3) | (r >> 2); 633 dst_u[0] = RGBToU(r, g, b); 634 dst_v[0] = RGBToV(r, g, b); 635 } 636 } 637 638 void ARGBToUV444Row_C(const uint8* src_argb, 639 uint8* dst_u, 640 uint8* dst_v, 641 int width) { 642 int x; 643 for (x = 0; x < width; ++x) { 644 uint8 ab = src_argb[0]; 645 uint8 ag = src_argb[1]; 646 uint8 ar = src_argb[2]; 647 dst_u[0] = RGBToU(ar, ag, ab); 648 dst_v[0] = RGBToV(ar, ag, ab); 649 src_argb += 4; 650 dst_u += 1; 651 dst_v += 1; 652 } 653 } 654 655 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 656 int x; 657 for (x = 0; x < width; ++x) { 658 uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]); 659 dst_argb[2] = dst_argb[1] = dst_argb[0] = y; 660 dst_argb[3] = src_argb[3]; 661 dst_argb += 4; 662 src_argb += 4; 663 } 664 } 665 666 // Convert a row of image to Sepia tone. 667 void ARGBSepiaRow_C(uint8* dst_argb, int width) { 668 int x; 669 for (x = 0; x < width; ++x) { 670 int b = dst_argb[0]; 671 int g = dst_argb[1]; 672 int r = dst_argb[2]; 673 int sb = (b * 17 + g * 68 + r * 35) >> 7; 674 int sg = (b * 22 + g * 88 + r * 45) >> 7; 675 int sr = (b * 24 + g * 98 + r * 50) >> 7; 676 // b does not over flow. a is preserved from original. 677 dst_argb[0] = sb; 678 dst_argb[1] = clamp255(sg); 679 dst_argb[2] = clamp255(sr); 680 dst_argb += 4; 681 } 682 } 683 684 // Apply color matrix to a row of image. Matrix is signed. 685 // TODO(fbarchard): Consider adding rounding (+32). 686 void ARGBColorMatrixRow_C(const uint8* src_argb, 687 uint8* dst_argb, 688 const int8* matrix_argb, 689 int width) { 690 int x; 691 for (x = 0; x < width; ++x) { 692 int b = src_argb[0]; 693 int g = src_argb[1]; 694 int r = src_argb[2]; 695 int a = src_argb[3]; 696 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] + 697 a * matrix_argb[3]) >> 698 6; 699 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] + 700 a * matrix_argb[7]) >> 701 6; 702 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] + 703 a * matrix_argb[11]) >> 704 6; 705 int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] + 706 a * matrix_argb[15]) >> 707 6; 708 dst_argb[0] = Clamp(sb); 709 dst_argb[1] = Clamp(sg); 710 dst_argb[2] = Clamp(sr); 711 dst_argb[3] = Clamp(sa); 712 src_argb += 4; 713 dst_argb += 4; 714 } 715 } 716 717 // Apply color table to a row of image. 718 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { 719 int x; 720 for (x = 0; x < width; ++x) { 721 int b = dst_argb[0]; 722 int g = dst_argb[1]; 723 int r = dst_argb[2]; 724 int a = dst_argb[3]; 725 dst_argb[0] = table_argb[b * 4 + 0]; 726 dst_argb[1] = table_argb[g * 4 + 1]; 727 dst_argb[2] = table_argb[r * 4 + 2]; 728 dst_argb[3] = table_argb[a * 4 + 3]; 729 dst_argb += 4; 730 } 731 } 732 733 // Apply color table to a row of image. 734 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { 735 int x; 736 for (x = 0; x < width; ++x) { 737 int b = dst_argb[0]; 738 int g = dst_argb[1]; 739 int r = dst_argb[2]; 740 dst_argb[0] = table_argb[b * 4 + 0]; 741 dst_argb[1] = table_argb[g * 4 + 1]; 742 dst_argb[2] = table_argb[r * 4 + 2]; 743 dst_argb += 4; 744 } 745 } 746 747 void ARGBQuantizeRow_C(uint8* dst_argb, 748 int scale, 749 int interval_size, 750 int interval_offset, 751 int width) { 752 int x; 753 for (x = 0; x < width; ++x) { 754 int b = dst_argb[0]; 755 int g = dst_argb[1]; 756 int r = dst_argb[2]; 757 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; 758 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset; 759 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset; 760 dst_argb += 4; 761 } 762 } 763 764 #define REPEAT8(v) (v) | ((v) << 8) 765 #define SHADE(f, v) v* f >> 24 766 767 void ARGBShadeRow_C(const uint8* src_argb, 768 uint8* dst_argb, 769 int width, 770 uint32 value) { 771 const uint32 b_scale = REPEAT8(value & 0xff); 772 const uint32 g_scale = REPEAT8((value >> 8) & 0xff); 773 const uint32 r_scale = REPEAT8((value >> 16) & 0xff); 774 const uint32 a_scale = REPEAT8(value >> 24); 775 776 int i; 777 for (i = 0; i < width; ++i) { 778 const uint32 b = REPEAT8(src_argb[0]); 779 const uint32 g = REPEAT8(src_argb[1]); 780 const uint32 r = REPEAT8(src_argb[2]); 781 const uint32 a = REPEAT8(src_argb[3]); 782 dst_argb[0] = SHADE(b, b_scale); 783 dst_argb[1] = SHADE(g, g_scale); 784 dst_argb[2] = SHADE(r, r_scale); 785 dst_argb[3] = SHADE(a, a_scale); 786 src_argb += 4; 787 dst_argb += 4; 788 } 789 } 790 #undef REPEAT8 791 #undef SHADE 792 793 #define REPEAT8(v) (v) | ((v) << 8) 794 #define SHADE(f, v) v* f >> 16 795 796 void ARGBMultiplyRow_C(const uint8* src_argb0, 797 const uint8* src_argb1, 798 uint8* dst_argb, 799 int width) { 800 int i; 801 for (i = 0; i < width; ++i) { 802 const uint32 b = REPEAT8(src_argb0[0]); 803 const uint32 g = REPEAT8(src_argb0[1]); 804 const uint32 r = REPEAT8(src_argb0[2]); 805 const uint32 a = REPEAT8(src_argb0[3]); 806 const uint32 b_scale = src_argb1[0]; 807 const uint32 g_scale = src_argb1[1]; 808 const uint32 r_scale = src_argb1[2]; 809 const uint32 a_scale = src_argb1[3]; 810 dst_argb[0] = SHADE(b, b_scale); 811 dst_argb[1] = SHADE(g, g_scale); 812 dst_argb[2] = SHADE(r, r_scale); 813 dst_argb[3] = SHADE(a, a_scale); 814 src_argb0 += 4; 815 src_argb1 += 4; 816 dst_argb += 4; 817 } 818 } 819 #undef REPEAT8 820 #undef SHADE 821 822 #define SHADE(f, v) clamp255(v + f) 823 824 void ARGBAddRow_C(const uint8* src_argb0, 825 const uint8* src_argb1, 826 uint8* dst_argb, 827 int width) { 828 int i; 829 for (i = 0; i < width; ++i) { 830 const int b = src_argb0[0]; 831 const int g = src_argb0[1]; 832 const int r = src_argb0[2]; 833 const int a = src_argb0[3]; 834 const int b_add = src_argb1[0]; 835 const int g_add = src_argb1[1]; 836 const int r_add = src_argb1[2]; 837 const int a_add = src_argb1[3]; 838 dst_argb[0] = SHADE(b, b_add); 839 dst_argb[1] = SHADE(g, g_add); 840 dst_argb[2] = SHADE(r, r_add); 841 dst_argb[3] = SHADE(a, a_add); 842 src_argb0 += 4; 843 src_argb1 += 4; 844 dst_argb += 4; 845 } 846 } 847 #undef SHADE 848 849 #define SHADE(f, v) clamp0(f - v) 850 851 void ARGBSubtractRow_C(const uint8* src_argb0, 852 const uint8* src_argb1, 853 uint8* dst_argb, 854 int width) { 855 int i; 856 for (i = 0; i < width; ++i) { 857 const int b = src_argb0[0]; 858 const int g = src_argb0[1]; 859 const int r = src_argb0[2]; 860 const int a = src_argb0[3]; 861 const int b_sub = src_argb1[0]; 862 const int g_sub = src_argb1[1]; 863 const int r_sub = src_argb1[2]; 864 const int a_sub = src_argb1[3]; 865 dst_argb[0] = SHADE(b, b_sub); 866 dst_argb[1] = SHADE(g, g_sub); 867 dst_argb[2] = SHADE(r, r_sub); 868 dst_argb[3] = SHADE(a, a_sub); 869 src_argb0 += 4; 870 src_argb1 += 4; 871 dst_argb += 4; 872 } 873 } 874 #undef SHADE 875 876 // Sobel functions which mimics SSSE3. 877 void SobelXRow_C(const uint8* src_y0, 878 const uint8* src_y1, 879 const uint8* src_y2, 880 uint8* dst_sobelx, 881 int width) { 882 int i; 883 for (i = 0; i < width; ++i) { 884 int a = src_y0[i]; 885 int b = src_y1[i]; 886 int c = src_y2[i]; 887 int a_sub = src_y0[i + 2]; 888 int b_sub = src_y1[i + 2]; 889 int c_sub = src_y2[i + 2]; 890 int a_diff = a - a_sub; 891 int b_diff = b - b_sub; 892 int c_diff = c - c_sub; 893 int sobel = Abs(a_diff + b_diff * 2 + c_diff); 894 dst_sobelx[i] = (uint8)(clamp255(sobel)); 895 } 896 } 897 898 void SobelYRow_C(const uint8* src_y0, 899 const uint8* src_y1, 900 uint8* dst_sobely, 901 int width) { 902 int i; 903 for (i = 0; i < width; ++i) { 904 int a = src_y0[i + 0]; 905 int b = src_y0[i + 1]; 906 int c = src_y0[i + 2]; 907 int a_sub = src_y1[i + 0]; 908 int b_sub = src_y1[i + 1]; 909 int c_sub = src_y1[i + 2]; 910 int a_diff = a - a_sub; 911 int b_diff = b - b_sub; 912 int c_diff = c - c_sub; 913 int sobel = Abs(a_diff + b_diff * 2 + c_diff); 914 dst_sobely[i] = (uint8)(clamp255(sobel)); 915 } 916 } 917 918 void SobelRow_C(const uint8* src_sobelx, 919 const uint8* src_sobely, 920 uint8* dst_argb, 921 int width) { 922 int i; 923 for (i = 0; i < width; ++i) { 924 int r = src_sobelx[i]; 925 int b = src_sobely[i]; 926 int s = clamp255(r + b); 927 dst_argb[0] = (uint8)(s); 928 dst_argb[1] = (uint8)(s); 929 dst_argb[2] = (uint8)(s); 930 dst_argb[3] = (uint8)(255u); 931 dst_argb += 4; 932 } 933 } 934 935 void SobelToPlaneRow_C(const uint8* src_sobelx, 936 const uint8* src_sobely, 937 uint8* dst_y, 938 int width) { 939 int i; 940 for (i = 0; i < width; ++i) { 941 int r = src_sobelx[i]; 942 int b = src_sobely[i]; 943 int s = clamp255(r + b); 944 dst_y[i] = (uint8)(s); 945 } 946 } 947 948 void SobelXYRow_C(const uint8* src_sobelx, 949 const uint8* src_sobely, 950 uint8* dst_argb, 951 int width) { 952 int i; 953 for (i = 0; i < width; ++i) { 954 int r = src_sobelx[i]; 955 int b = src_sobely[i]; 956 int g = clamp255(r + b); 957 dst_argb[0] = (uint8)(b); 958 dst_argb[1] = (uint8)(g); 959 dst_argb[2] = (uint8)(r); 960 dst_argb[3] = (uint8)(255u); 961 dst_argb += 4; 962 } 963 } 964 965 void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { 966 // Copy a Y to RGB. 967 int x; 968 for (x = 0; x < width; ++x) { 969 uint8 y = src_y[0]; 970 dst_argb[2] = dst_argb[1] = dst_argb[0] = y; 971 dst_argb[3] = 255u; 972 dst_argb += 4; 973 ++src_y; 974 } 975 } 976 977 // TODO(fbarchard): Unify these structures to be platform independent. 978 // TODO(fbarchard): Generate SIMD structures from float matrix. 979 980 // BT.601 YUV to RGB reference 981 // R = (Y - 16) * 1.164 - V * -1.596 982 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 983 // B = (Y - 16) * 1.164 - U * -2.018 984 985 // Y contribution to R,G,B. Scale and bias. 986 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ 987 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ 988 989 // U and V contributions to R,G,B. 990 #define UB -128 /* max(-128, round(-2.018 * 64)) */ 991 #define UG 25 /* round(0.391 * 64) */ 992 #define VG 52 /* round(0.813 * 64) */ 993 #define VR -102 /* round(-1.596 * 64) */ 994 995 // Bias values to subtract 16 from Y and 128 from U and V. 996 #define BB (UB * 128 + YGB) 997 #define BG (UG * 128 + VG * 128 + YGB) 998 #define BR (VR * 128 + YGB) 999 1000 #if defined(__aarch64__) // 64 bit arm 1001 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { 1002 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, 1003 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, 1004 {UG, VG, UG, VG, UG, VG, UG, VG}, 1005 {UG, VG, UG, VG, UG, VG, UG, VG}, 1006 {BB, BG, BR, 0, 0, 0, 0, 0}, 1007 {0x0101 * YG, 0, 0, 0}}; 1008 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { 1009 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, 1010 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, 1011 {VG, UG, VG, UG, VG, UG, VG, UG}, 1012 {VG, UG, VG, UG, VG, UG, VG, UG}, 1013 {BR, BG, BB, 0, 0, 0, 0, 0}, 1014 {0x0101 * YG, 0, 0, 0}}; 1015 #elif defined(__arm__) // 32 bit arm 1016 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { 1017 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, 1018 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, 1019 {BB, BG, BR, 0, 0, 0, 0, 0}, 1020 {0x0101 * YG, 0, 0, 0}}; 1021 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { 1022 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, 1023 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, 1024 {BR, BG, BB, 0, 0, 0, 0, 0}, 1025 {0x0101 * YG, 0, 0, 0}}; 1026 #else 1027 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { 1028 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, 1029 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, 1030 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, 1031 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, 1032 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 1033 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, 1034 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, 1035 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, 1036 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, 1037 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; 1038 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { 1039 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, 1040 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, 1041 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, 1042 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, 1043 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 1044 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, 1045 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, 1046 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, 1047 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, 1048 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; 1049 #endif 1050 1051 #undef BB 1052 #undef BG 1053 #undef BR 1054 #undef YGB 1055 #undef UB 1056 #undef UG 1057 #undef VG 1058 #undef VR 1059 #undef YG 1060 1061 // JPEG YUV to RGB reference 1062 // * R = Y - V * -1.40200 1063 // * G = Y - U * 0.34414 - V * 0.71414 1064 // * B = Y - U * -1.77200 1065 1066 // Y contribution to R,G,B. Scale and bias. 1067 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ 1068 #define YGB 32 /* 64 / 2 */ 1069 1070 // U and V contributions to R,G,B. 1071 #define UB -113 /* round(-1.77200 * 64) */ 1072 #define UG 22 /* round(0.34414 * 64) */ 1073 #define VG 46 /* round(0.71414 * 64) */ 1074 #define VR -90 /* round(-1.40200 * 64) */ 1075 1076 // Bias values to round, and subtract 128 from U and V. 1077 #define BB (UB * 128 + YGB) 1078 #define BG (UG * 128 + VG * 128 + YGB) 1079 #define BR (VR * 128 + YGB) 1080 1081 #if defined(__aarch64__) 1082 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { 1083 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, 1084 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, 1085 {UG, VG, UG, VG, UG, VG, UG, VG}, 1086 {UG, VG, UG, VG, UG, VG, UG, VG}, 1087 {BB, BG, BR, 0, 0, 0, 0, 0}, 1088 {0x0101 * YG, 0, 0, 0}}; 1089 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { 1090 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, 1091 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, 1092 {VG, UG, VG, UG, VG, UG, VG, UG}, 1093 {VG, UG, VG, UG, VG, UG, VG, UG}, 1094 {BR, BG, BB, 0, 0, 0, 0, 0}, 1095 {0x0101 * YG, 0, 0, 0}}; 1096 #elif defined(__arm__) 1097 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { 1098 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, 1099 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, 1100 {BB, BG, BR, 0, 0, 0, 0, 0}, 1101 {0x0101 * YG, 0, 0, 0}}; 1102 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { 1103 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, 1104 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, 1105 {BR, BG, BB, 0, 0, 0, 0, 0}, 1106 {0x0101 * YG, 0, 0, 0}}; 1107 #else 1108 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { 1109 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, 1110 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, 1111 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, 1112 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, 1113 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 1114 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, 1115 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, 1116 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, 1117 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, 1118 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; 1119 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { 1120 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, 1121 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, 1122 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, 1123 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, 1124 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 1125 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, 1126 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, 1127 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, 1128 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, 1129 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; 1130 #endif 1131 1132 #undef BB 1133 #undef BG 1134 #undef BR 1135 #undef YGB 1136 #undef UB 1137 #undef UG 1138 #undef VG 1139 #undef VR 1140 #undef YG 1141 1142 // BT.709 YUV to RGB reference 1143 // R = (Y - 16) * 1.164 - V * -1.793 1144 // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533 1145 // B = (Y - 16) * 1.164 - U * -2.112 1146 // See also http://www.equasys.de/colorconversion.html 1147 1148 // Y contribution to R,G,B. Scale and bias. 1149 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ 1150 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ 1151 1152 // TODO(fbarchard): Find way to express 2.112 instead of 2.0. 1153 // U and V contributions to R,G,B. 1154 #define UB -128 /* max(-128, round(-2.112 * 64)) */ 1155 #define UG 14 /* round(0.213 * 64) */ 1156 #define VG 34 /* round(0.533 * 64) */ 1157 #define VR -115 /* round(-1.793 * 64) */ 1158 1159 // Bias values to round, and subtract 128 from U and V. 1160 #define BB (UB * 128 + YGB) 1161 #define BG (UG * 128 + VG * 128 + YGB) 1162 #define BR (VR * 128 + YGB) 1163 1164 #if defined(__aarch64__) 1165 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { 1166 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, 1167 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, 1168 {UG, VG, UG, VG, UG, VG, UG, VG}, 1169 {UG, VG, UG, VG, UG, VG, UG, VG}, 1170 {BB, BG, BR, 0, 0, 0, 0, 0}, 1171 {0x0101 * YG, 0, 0, 0}}; 1172 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { 1173 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, 1174 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, 1175 {VG, UG, VG, UG, VG, UG, VG, UG}, 1176 {VG, UG, VG, UG, VG, UG, VG, UG}, 1177 {BR, BG, BB, 0, 0, 0, 0, 0}, 1178 {0x0101 * YG, 0, 0, 0}}; 1179 #elif defined(__arm__) 1180 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { 1181 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, 1182 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, 1183 {BB, BG, BR, 0, 0, 0, 0, 0}, 1184 {0x0101 * YG, 0, 0, 0}}; 1185 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { 1186 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, 1187 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, 1188 {BR, BG, BB, 0, 0, 0, 0, 0}, 1189 {0x0101 * YG, 0, 0, 0}}; 1190 #else 1191 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { 1192 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, 1193 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, 1194 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, 1195 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, 1196 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 1197 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, 1198 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, 1199 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, 1200 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, 1201 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; 1202 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { 1203 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, 1204 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, 1205 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, 1206 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, 1207 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 1208 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, 1209 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, 1210 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, 1211 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, 1212 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; 1213 #endif 1214 1215 #undef BB 1216 #undef BG 1217 #undef BR 1218 #undef YGB 1219 #undef UB 1220 #undef UG 1221 #undef VG 1222 #undef VR 1223 #undef YG 1224 1225 // C reference code that mimics the YUV assembly. 1226 static __inline void YuvPixel(uint8 y, 1227 uint8 u, 1228 uint8 v, 1229 uint8* b, 1230 uint8* g, 1231 uint8* r, 1232 const struct YuvConstants* yuvconstants) { 1233 #if defined(__aarch64__) 1234 int ub = -yuvconstants->kUVToRB[0]; 1235 int ug = yuvconstants->kUVToG[0]; 1236 int vg = yuvconstants->kUVToG[1]; 1237 int vr = -yuvconstants->kUVToRB[1]; 1238 int bb = yuvconstants->kUVBiasBGR[0]; 1239 int bg = yuvconstants->kUVBiasBGR[1]; 1240 int br = yuvconstants->kUVBiasBGR[2]; 1241 int yg = yuvconstants->kYToRgb[0] / 0x0101; 1242 #elif defined(__arm__) 1243 int ub = -yuvconstants->kUVToRB[0]; 1244 int ug = yuvconstants->kUVToG[0]; 1245 int vg = yuvconstants->kUVToG[4]; 1246 int vr = -yuvconstants->kUVToRB[4]; 1247 int bb = yuvconstants->kUVBiasBGR[0]; 1248 int bg = yuvconstants->kUVBiasBGR[1]; 1249 int br = yuvconstants->kUVBiasBGR[2]; 1250 int yg = yuvconstants->kYToRgb[0] / 0x0101; 1251 #else 1252 int ub = yuvconstants->kUVToB[0]; 1253 int ug = yuvconstants->kUVToG[0]; 1254 int vg = yuvconstants->kUVToG[1]; 1255 int vr = yuvconstants->kUVToR[1]; 1256 int bb = yuvconstants->kUVBiasB[0]; 1257 int bg = yuvconstants->kUVBiasG[0]; 1258 int br = yuvconstants->kUVBiasR[0]; 1259 int yg = yuvconstants->kYToRgb[0]; 1260 #endif 1261 1262 uint32 y1 = (uint32)(y * 0x0101 * yg) >> 16; 1263 *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6); 1264 *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6); 1265 *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6); 1266 } 1267 1268 // Y contribution to R,G,B. Scale and bias. 1269 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ 1270 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ 1271 1272 // C reference code that mimics the YUV assembly. 1273 static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { 1274 uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16; 1275 *b = Clamp((int32)(y1 + YGB) >> 6); 1276 *g = Clamp((int32)(y1 + YGB) >> 6); 1277 *r = Clamp((int32)(y1 + YGB) >> 6); 1278 } 1279 1280 #undef YG 1281 #undef YGB 1282 1283 #if !defined(LIBYUV_DISABLE_NEON) && \ 1284 (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON)) 1285 // C mimic assembly. 1286 // TODO(fbarchard): Remove subsampling from Neon. 1287 void I444ToARGBRow_C(const uint8* src_y, 1288 const uint8* src_u, 1289 const uint8* src_v, 1290 uint8* rgb_buf, 1291 const struct YuvConstants* yuvconstants, 1292 int width) { 1293 int x; 1294 for (x = 0; x < width - 1; x += 2) { 1295 uint8 u = (src_u[0] + src_u[1] + 1) >> 1; 1296 uint8 v = (src_v[0] + src_v[1] + 1) >> 1; 1297 YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, 1298 yuvconstants); 1299 rgb_buf[3] = 255; 1300 YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, 1301 yuvconstants); 1302 rgb_buf[7] = 255; 1303 src_y += 2; 1304 src_u += 2; 1305 src_v += 2; 1306 rgb_buf += 8; // Advance 2 pixels. 1307 } 1308 if (width & 1) { 1309 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, 1310 rgb_buf + 2, yuvconstants); 1311 rgb_buf[3] = 255; 1312 } 1313 } 1314 #else 1315 void I444ToARGBRow_C(const uint8* src_y, 1316 const uint8* src_u, 1317 const uint8* src_v, 1318 uint8* rgb_buf, 1319 const struct YuvConstants* yuvconstants, 1320 int width) { 1321 int x; 1322 for (x = 0; x < width; ++x) { 1323 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, 1324 rgb_buf + 2, yuvconstants); 1325 rgb_buf[3] = 255; 1326 src_y += 1; 1327 src_u += 1; 1328 src_v += 1; 1329 rgb_buf += 4; // Advance 1 pixel. 1330 } 1331 } 1332 #endif 1333 1334 // Also used for 420 1335 void I422ToARGBRow_C(const uint8* src_y, 1336 const uint8* src_u, 1337 const uint8* src_v, 1338 uint8* rgb_buf, 1339 const struct YuvConstants* yuvconstants, 1340 int width) { 1341 int x; 1342 for (x = 0; x < width - 1; x += 2) { 1343 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, 1344 rgb_buf + 2, yuvconstants); 1345 rgb_buf[3] = 255; 1346 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, 1347 rgb_buf + 6, yuvconstants); 1348 rgb_buf[7] = 255; 1349 src_y += 2; 1350 src_u += 1; 1351 src_v += 1; 1352 rgb_buf += 8; // Advance 2 pixels. 1353 } 1354 if (width & 1) { 1355 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, 1356 rgb_buf + 2, yuvconstants); 1357 rgb_buf[3] = 255; 1358 } 1359 } 1360 1361 void I422AlphaToARGBRow_C(const uint8* src_y, 1362 const uint8* src_u, 1363 const uint8* src_v, 1364 const uint8* src_a, 1365 uint8* rgb_buf, 1366 const struct YuvConstants* yuvconstants, 1367 int width) { 1368 int x; 1369 for (x = 0; x < width - 1; x += 2) { 1370 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, 1371 rgb_buf + 2, yuvconstants); 1372 rgb_buf[3] = src_a[0]; 1373 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, 1374 rgb_buf + 6, yuvconstants); 1375 rgb_buf[7] = src_a[1]; 1376 src_y += 2; 1377 src_u += 1; 1378 src_v += 1; 1379 src_a += 2; 1380 rgb_buf += 8; // Advance 2 pixels. 1381 } 1382 if (width & 1) { 1383 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, 1384 rgb_buf + 2, yuvconstants); 1385 rgb_buf[3] = src_a[0]; 1386 } 1387 } 1388 1389 void I422ToRGB24Row_C(const uint8* src_y, 1390 const uint8* src_u, 1391 const uint8* src_v, 1392 uint8* rgb_buf, 1393 const struct YuvConstants* yuvconstants, 1394 int width) { 1395 int x; 1396 for (x = 0; x < width - 1; x += 2) { 1397 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, 1398 rgb_buf + 2, yuvconstants); 1399 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4, 1400 rgb_buf + 5, yuvconstants); 1401 src_y += 2; 1402 src_u += 1; 1403 src_v += 1; 1404 rgb_buf += 6; // Advance 2 pixels. 1405 } 1406 if (width & 1) { 1407 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, 1408 rgb_buf + 2, yuvconstants); 1409 } 1410 } 1411 1412 void I422ToARGB4444Row_C(const uint8* src_y, 1413 const uint8* src_u, 1414 const uint8* src_v, 1415 uint8* dst_argb4444, 1416 const struct YuvConstants* yuvconstants, 1417 int width) { 1418 uint8 b0; 1419 uint8 g0; 1420 uint8 r0; 1421 uint8 b1; 1422 uint8 g1; 1423 uint8 r1; 1424 int x; 1425 for (x = 0; x < width - 1; x += 2) { 1426 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1427 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); 1428 b0 = b0 >> 4; 1429 g0 = g0 >> 4; 1430 r0 = r0 >> 4; 1431 b1 = b1 >> 4; 1432 g1 = g1 >> 4; 1433 r1 = r1 >> 4; 1434 *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) | 1435 (g1 << 20) | (r1 << 24) | 0xf000f000; 1436 src_y += 2; 1437 src_u += 1; 1438 src_v += 1; 1439 dst_argb4444 += 4; // Advance 2 pixels. 1440 } 1441 if (width & 1) { 1442 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1443 b0 = b0 >> 4; 1444 g0 = g0 >> 4; 1445 r0 = r0 >> 4; 1446 *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; 1447 } 1448 } 1449 1450 void I422ToARGB1555Row_C(const uint8* src_y, 1451 const uint8* src_u, 1452 const uint8* src_v, 1453 uint8* dst_argb1555, 1454 const struct YuvConstants* yuvconstants, 1455 int width) { 1456 uint8 b0; 1457 uint8 g0; 1458 uint8 r0; 1459 uint8 b1; 1460 uint8 g1; 1461 uint8 r1; 1462 int x; 1463 for (x = 0; x < width - 1; x += 2) { 1464 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1465 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); 1466 b0 = b0 >> 3; 1467 g0 = g0 >> 3; 1468 r0 = r0 >> 3; 1469 b1 = b1 >> 3; 1470 g1 = g1 >> 3; 1471 r1 = r1 >> 3; 1472 *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) | 1473 (g1 << 21) | (r1 << 26) | 0x80008000; 1474 src_y += 2; 1475 src_u += 1; 1476 src_v += 1; 1477 dst_argb1555 += 4; // Advance 2 pixels. 1478 } 1479 if (width & 1) { 1480 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1481 b0 = b0 >> 3; 1482 g0 = g0 >> 3; 1483 r0 = r0 >> 3; 1484 *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; 1485 } 1486 } 1487 1488 void I422ToRGB565Row_C(const uint8* src_y, 1489 const uint8* src_u, 1490 const uint8* src_v, 1491 uint8* dst_rgb565, 1492 const struct YuvConstants* yuvconstants, 1493 int width) { 1494 uint8 b0; 1495 uint8 g0; 1496 uint8 r0; 1497 uint8 b1; 1498 uint8 g1; 1499 uint8 r1; 1500 int x; 1501 for (x = 0; x < width - 1; x += 2) { 1502 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1503 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); 1504 b0 = b0 >> 3; 1505 g0 = g0 >> 2; 1506 r0 = r0 >> 3; 1507 b1 = b1 >> 3; 1508 g1 = g1 >> 2; 1509 r1 = r1 >> 3; 1510 *(uint32*)(dst_rgb565) = 1511 b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); 1512 src_y += 2; 1513 src_u += 1; 1514 src_v += 1; 1515 dst_rgb565 += 4; // Advance 2 pixels. 1516 } 1517 if (width & 1) { 1518 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); 1519 b0 = b0 >> 3; 1520 g0 = g0 >> 2; 1521 r0 = r0 >> 3; 1522 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); 1523 } 1524 } 1525 1526 void NV12ToARGBRow_C(const uint8* src_y, 1527 const uint8* src_uv, 1528 uint8* rgb_buf, 1529 const struct YuvConstants* yuvconstants, 1530 int width) { 1531 int x; 1532 for (x = 0; x < width - 1; x += 2) { 1533 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, 1534 rgb_buf + 2, yuvconstants); 1535 rgb_buf[3] = 255; 1536 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5, 1537 rgb_buf + 6, yuvconstants); 1538 rgb_buf[7] = 255; 1539 src_y += 2; 1540 src_uv += 2; 1541 rgb_buf += 8; // Advance 2 pixels. 1542 } 1543 if (width & 1) { 1544 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, 1545 rgb_buf + 2, yuvconstants); 1546 rgb_buf[3] = 255; 1547 } 1548 } 1549 1550 void NV21ToARGBRow_C(const uint8* src_y, 1551 const uint8* src_vu, 1552 uint8* rgb_buf, 1553 const struct YuvConstants* yuvconstants, 1554 int width) { 1555 int x; 1556 for (x = 0; x < width - 1; x += 2) { 1557 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, 1558 rgb_buf + 2, yuvconstants); 1559 rgb_buf[3] = 255; 1560 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5, 1561 rgb_buf + 6, yuvconstants); 1562 rgb_buf[7] = 255; 1563 src_y += 2; 1564 src_vu += 2; 1565 rgb_buf += 8; // Advance 2 pixels. 1566 } 1567 if (width & 1) { 1568 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, 1569 rgb_buf + 2, yuvconstants); 1570 rgb_buf[3] = 255; 1571 } 1572 } 1573 1574 void NV12ToRGB565Row_C(const uint8* src_y, 1575 const uint8* src_uv, 1576 uint8* dst_rgb565, 1577 const struct YuvConstants* yuvconstants, 1578 int width) { 1579 uint8 b0; 1580 uint8 g0; 1581 uint8 r0; 1582 uint8 b1; 1583 uint8 g1; 1584 uint8 r1; 1585 int x; 1586 for (x = 0; x < width - 1; x += 2) { 1587 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); 1588 YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants); 1589 b0 = b0 >> 3; 1590 g0 = g0 >> 2; 1591 r0 = r0 >> 3; 1592 b1 = b1 >> 3; 1593 g1 = g1 >> 2; 1594 r1 = r1 >> 3; 1595 *(uint32*)(dst_rgb565) = 1596 b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); 1597 src_y += 2; 1598 src_uv += 2; 1599 dst_rgb565 += 4; // Advance 2 pixels. 1600 } 1601 if (width & 1) { 1602 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); 1603 b0 = b0 >> 3; 1604 g0 = g0 >> 2; 1605 r0 = r0 >> 3; 1606 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); 1607 } 1608 } 1609 1610 void YUY2ToARGBRow_C(const uint8* src_yuy2, 1611 uint8* rgb_buf, 1612 const struct YuvConstants* yuvconstants, 1613 int width) { 1614 int x; 1615 for (x = 0; x < width - 1; x += 2) { 1616 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1, 1617 rgb_buf + 2, yuvconstants); 1618 rgb_buf[3] = 255; 1619 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5, 1620 rgb_buf + 6, yuvconstants); 1621 rgb_buf[7] = 255; 1622 src_yuy2 += 4; 1623 rgb_buf += 8; // Advance 2 pixels. 1624 } 1625 if (width & 1) { 1626 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1, 1627 rgb_buf + 2, yuvconstants); 1628 rgb_buf[3] = 255; 1629 } 1630 } 1631 1632 void UYVYToARGBRow_C(const uint8* src_uyvy, 1633 uint8* rgb_buf, 1634 const struct YuvConstants* yuvconstants, 1635 int width) { 1636 int x; 1637 for (x = 0; x < width - 1; x += 2) { 1638 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1, 1639 rgb_buf + 2, yuvconstants); 1640 rgb_buf[3] = 255; 1641 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5, 1642 rgb_buf + 6, yuvconstants); 1643 rgb_buf[7] = 255; 1644 src_uyvy += 4; 1645 rgb_buf += 8; // Advance 2 pixels. 1646 } 1647 if (width & 1) { 1648 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1, 1649 rgb_buf + 2, yuvconstants); 1650 rgb_buf[3] = 255; 1651 } 1652 } 1653 1654 void I422ToRGBARow_C(const uint8* src_y, 1655 const uint8* src_u, 1656 const uint8* src_v, 1657 uint8* rgb_buf, 1658 const struct YuvConstants* yuvconstants, 1659 int width) { 1660 int x; 1661 for (x = 0; x < width - 1; x += 2) { 1662 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2, 1663 rgb_buf + 3, yuvconstants); 1664 rgb_buf[0] = 255; 1665 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6, 1666 rgb_buf + 7, yuvconstants); 1667 rgb_buf[4] = 255; 1668 src_y += 2; 1669 src_u += 1; 1670 src_v += 1; 1671 rgb_buf += 8; // Advance 2 pixels. 1672 } 1673 if (width & 1) { 1674 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2, 1675 rgb_buf + 3, yuvconstants); 1676 rgb_buf[0] = 255; 1677 } 1678 } 1679 1680 void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) { 1681 int x; 1682 for (x = 0; x < width - 1; x += 2) { 1683 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1684 rgb_buf[3] = 255; 1685 YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1686 rgb_buf[7] = 255; 1687 src_y += 2; 1688 rgb_buf += 8; // Advance 2 pixels. 1689 } 1690 if (width & 1) { 1691 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1692 rgb_buf[3] = 255; 1693 } 1694 } 1695 1696 void MirrorRow_C(const uint8* src, uint8* dst, int width) { 1697 int x; 1698 src += width - 1; 1699 for (x = 0; x < width - 1; x += 2) { 1700 dst[x] = src[0]; 1701 dst[x + 1] = src[-1]; 1702 src -= 2; 1703 } 1704 if (width & 1) { 1705 dst[width - 1] = src[0]; 1706 } 1707 } 1708 1709 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 1710 int x; 1711 src_uv += (width - 1) << 1; 1712 for (x = 0; x < width - 1; x += 2) { 1713 dst_u[x] = src_uv[0]; 1714 dst_u[x + 1] = src_uv[-2]; 1715 dst_v[x] = src_uv[1]; 1716 dst_v[x + 1] = src_uv[-2 + 1]; 1717 src_uv -= 4; 1718 } 1719 if (width & 1) { 1720 dst_u[width - 1] = src_uv[0]; 1721 dst_v[width - 1] = src_uv[1]; 1722 } 1723 } 1724 1725 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { 1726 int x; 1727 const uint32* src32 = (const uint32*)(src); 1728 uint32* dst32 = (uint32*)(dst); 1729 src32 += width - 1; 1730 for (x = 0; x < width - 1; x += 2) { 1731 dst32[x] = src32[0]; 1732 dst32[x + 1] = src32[-1]; 1733 src32 -= 2; 1734 } 1735 if (width & 1) { 1736 dst32[width - 1] = src32[0]; 1737 } 1738 } 1739 1740 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 1741 int x; 1742 for (x = 0; x < width - 1; x += 2) { 1743 dst_u[x] = src_uv[0]; 1744 dst_u[x + 1] = src_uv[2]; 1745 dst_v[x] = src_uv[1]; 1746 dst_v[x + 1] = src_uv[3]; 1747 src_uv += 4; 1748 } 1749 if (width & 1) { 1750 dst_u[width - 1] = src_uv[0]; 1751 dst_v[width - 1] = src_uv[1]; 1752 } 1753 } 1754 1755 void MergeUVRow_C(const uint8* src_u, 1756 const uint8* src_v, 1757 uint8* dst_uv, 1758 int width) { 1759 int x; 1760 for (x = 0; x < width - 1; x += 2) { 1761 dst_uv[0] = src_u[x]; 1762 dst_uv[1] = src_v[x]; 1763 dst_uv[2] = src_u[x + 1]; 1764 dst_uv[3] = src_v[x + 1]; 1765 dst_uv += 4; 1766 } 1767 if (width & 1) { 1768 dst_uv[0] = src_u[width - 1]; 1769 dst_uv[1] = src_v[width - 1]; 1770 } 1771 } 1772 1773 void CopyRow_C(const uint8* src, uint8* dst, int count) { 1774 memcpy(dst, src, count); 1775 } 1776 1777 void CopyRow_16_C(const uint16* src, uint16* dst, int count) { 1778 memcpy(dst, src, count * 2); 1779 } 1780 1781 void SetRow_C(uint8* dst, uint8 v8, int width) { 1782 memset(dst, v8, width); 1783 } 1784 1785 void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) { 1786 uint32* d = (uint32*)(dst_argb); 1787 int x; 1788 for (x = 0; x < width; ++x) { 1789 d[x] = v32; 1790 } 1791 } 1792 1793 // Filter 2 rows of YUY2 UV's (422) into U and V (420). 1794 void YUY2ToUVRow_C(const uint8* src_yuy2, 1795 int src_stride_yuy2, 1796 uint8* dst_u, 1797 uint8* dst_v, 1798 int width) { 1799 // Output a row of UV values, filtering 2 rows of YUY2. 1800 int x; 1801 for (x = 0; x < width; x += 2) { 1802 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1; 1803 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1; 1804 src_yuy2 += 4; 1805 dst_u += 1; 1806 dst_v += 1; 1807 } 1808 } 1809 1810 // Copy row of YUY2 UV's (422) into U and V (422). 1811 void YUY2ToUV422Row_C(const uint8* src_yuy2, 1812 uint8* dst_u, 1813 uint8* dst_v, 1814 int width) { 1815 // Output a row of UV values. 1816 int x; 1817 for (x = 0; x < width; x += 2) { 1818 dst_u[0] = src_yuy2[1]; 1819 dst_v[0] = src_yuy2[3]; 1820 src_yuy2 += 4; 1821 dst_u += 1; 1822 dst_v += 1; 1823 } 1824 } 1825 1826 // Copy row of YUY2 Y's (422) into Y (420/422). 1827 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) { 1828 // Output a row of Y values. 1829 int x; 1830 for (x = 0; x < width - 1; x += 2) { 1831 dst_y[x] = src_yuy2[0]; 1832 dst_y[x + 1] = src_yuy2[2]; 1833 src_yuy2 += 4; 1834 } 1835 if (width & 1) { 1836 dst_y[width - 1] = src_yuy2[0]; 1837 } 1838 } 1839 1840 // Filter 2 rows of UYVY UV's (422) into U and V (420). 1841 void UYVYToUVRow_C(const uint8* src_uyvy, 1842 int src_stride_uyvy, 1843 uint8* dst_u, 1844 uint8* dst_v, 1845 int width) { 1846 // Output a row of UV values. 1847 int x; 1848 for (x = 0; x < width; x += 2) { 1849 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1; 1850 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1; 1851 src_uyvy += 4; 1852 dst_u += 1; 1853 dst_v += 1; 1854 } 1855 } 1856 1857 // Copy row of UYVY UV's (422) into U and V (422). 1858 void UYVYToUV422Row_C(const uint8* src_uyvy, 1859 uint8* dst_u, 1860 uint8* dst_v, 1861 int width) { 1862 // Output a row of UV values. 1863 int x; 1864 for (x = 0; x < width; x += 2) { 1865 dst_u[0] = src_uyvy[0]; 1866 dst_v[0] = src_uyvy[2]; 1867 src_uyvy += 4; 1868 dst_u += 1; 1869 dst_v += 1; 1870 } 1871 } 1872 1873 // Copy row of UYVY Y's (422) into Y (420/422). 1874 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) { 1875 // Output a row of Y values. 1876 int x; 1877 for (x = 0; x < width - 1; x += 2) { 1878 dst_y[x] = src_uyvy[1]; 1879 dst_y[x + 1] = src_uyvy[3]; 1880 src_uyvy += 4; 1881 } 1882 if (width & 1) { 1883 dst_y[width - 1] = src_uyvy[1]; 1884 } 1885 } 1886 1887 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f 1888 1889 // Blend src_argb0 over src_argb1 and store to dst_argb. 1890 // dst_argb may be src_argb0 or src_argb1. 1891 // This code mimics the SSSE3 version for better testability. 1892 void ARGBBlendRow_C(const uint8* src_argb0, 1893 const uint8* src_argb1, 1894 uint8* dst_argb, 1895 int width) { 1896 int x; 1897 for (x = 0; x < width - 1; x += 2) { 1898 uint32 fb = src_argb0[0]; 1899 uint32 fg = src_argb0[1]; 1900 uint32 fr = src_argb0[2]; 1901 uint32 a = src_argb0[3]; 1902 uint32 bb = src_argb1[0]; 1903 uint32 bg = src_argb1[1]; 1904 uint32 br = src_argb1[2]; 1905 dst_argb[0] = BLEND(fb, bb, a); 1906 dst_argb[1] = BLEND(fg, bg, a); 1907 dst_argb[2] = BLEND(fr, br, a); 1908 dst_argb[3] = 255u; 1909 1910 fb = src_argb0[4 + 0]; 1911 fg = src_argb0[4 + 1]; 1912 fr = src_argb0[4 + 2]; 1913 a = src_argb0[4 + 3]; 1914 bb = src_argb1[4 + 0]; 1915 bg = src_argb1[4 + 1]; 1916 br = src_argb1[4 + 2]; 1917 dst_argb[4 + 0] = BLEND(fb, bb, a); 1918 dst_argb[4 + 1] = BLEND(fg, bg, a); 1919 dst_argb[4 + 2] = BLEND(fr, br, a); 1920 dst_argb[4 + 3] = 255u; 1921 src_argb0 += 8; 1922 src_argb1 += 8; 1923 dst_argb += 8; 1924 } 1925 1926 if (width & 1) { 1927 uint32 fb = src_argb0[0]; 1928 uint32 fg = src_argb0[1]; 1929 uint32 fr = src_argb0[2]; 1930 uint32 a = src_argb0[3]; 1931 uint32 bb = src_argb1[0]; 1932 uint32 bg = src_argb1[1]; 1933 uint32 br = src_argb1[2]; 1934 dst_argb[0] = BLEND(fb, bb, a); 1935 dst_argb[1] = BLEND(fg, bg, a); 1936 dst_argb[2] = BLEND(fr, br, a); 1937 dst_argb[3] = 255u; 1938 } 1939 } 1940 #undef BLEND 1941 1942 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8 1943 void BlendPlaneRow_C(const uint8* src0, 1944 const uint8* src1, 1945 const uint8* alpha, 1946 uint8* dst, 1947 int width) { 1948 int x; 1949 for (x = 0; x < width - 1; x += 2) { 1950 dst[0] = UBLEND(src0[0], src1[0], alpha[0]); 1951 dst[1] = UBLEND(src0[1], src1[1], alpha[1]); 1952 src0 += 2; 1953 src1 += 2; 1954 alpha += 2; 1955 dst += 2; 1956 } 1957 if (width & 1) { 1958 dst[0] = UBLEND(src0[0], src1[0], alpha[0]); 1959 } 1960 } 1961 #undef UBLEND 1962 1963 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24 1964 1965 // Multiply source RGB by alpha and store to destination. 1966 // This code mimics the SSSE3 version for better testability. 1967 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 1968 int i; 1969 for (i = 0; i < width - 1; i += 2) { 1970 uint32 b = src_argb[0]; 1971 uint32 g = src_argb[1]; 1972 uint32 r = src_argb[2]; 1973 uint32 a = src_argb[3]; 1974 dst_argb[0] = ATTENUATE(b, a); 1975 dst_argb[1] = ATTENUATE(g, a); 1976 dst_argb[2] = ATTENUATE(r, a); 1977 dst_argb[3] = a; 1978 b = src_argb[4]; 1979 g = src_argb[5]; 1980 r = src_argb[6]; 1981 a = src_argb[7]; 1982 dst_argb[4] = ATTENUATE(b, a); 1983 dst_argb[5] = ATTENUATE(g, a); 1984 dst_argb[6] = ATTENUATE(r, a); 1985 dst_argb[7] = a; 1986 src_argb += 8; 1987 dst_argb += 8; 1988 } 1989 1990 if (width & 1) { 1991 const uint32 b = src_argb[0]; 1992 const uint32 g = src_argb[1]; 1993 const uint32 r = src_argb[2]; 1994 const uint32 a = src_argb[3]; 1995 dst_argb[0] = ATTENUATE(b, a); 1996 dst_argb[1] = ATTENUATE(g, a); 1997 dst_argb[2] = ATTENUATE(r, a); 1998 dst_argb[3] = a; 1999 } 2000 } 2001 #undef ATTENUATE 2002 2003 // Divide source RGB by alpha and store to destination. 2004 // b = (b * 255 + (a / 2)) / a; 2005 // g = (g * 255 + (a / 2)) / a; 2006 // r = (r * 255 + (a / 2)) / a; 2007 // Reciprocal method is off by 1 on some values. ie 125 2008 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower. 2009 #define T(a) 0x01000000 + (0x10000 / a) 2010 const uint32 fixed_invtbl8[256] = { 2011 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), 2012 T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), 2013 T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), 2014 T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b), 2015 T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22), 2016 T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29), 2017 T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30), 2018 T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37), 2019 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), 2020 T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), 2021 T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), 2022 T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53), 2023 T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a), 2024 T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61), 2025 T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68), 2026 T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f), 2027 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), 2028 T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), 2029 T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), 2030 T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b), 2031 T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92), 2032 T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99), 2033 T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0), 2034 T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7), 2035 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), 2036 T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), 2037 T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), 2038 T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3), 2039 T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca), 2040 T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1), 2041 T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8), 2042 T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf), 2043 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), 2044 T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), 2045 T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), 2046 T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb), 2047 T(0xfc), T(0xfd), T(0xfe), 0x01000100}; 2048 #undef T 2049 2050 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 2051 int i; 2052 for (i = 0; i < width; ++i) { 2053 uint32 b = src_argb[0]; 2054 uint32 g = src_argb[1]; 2055 uint32 r = src_argb[2]; 2056 const uint32 a = src_argb[3]; 2057 const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point 2058 b = (b * ia) >> 8; 2059 g = (g * ia) >> 8; 2060 r = (r * ia) >> 8; 2061 // Clamping should not be necessary but is free in assembly. 2062 dst_argb[0] = clamp255(b); 2063 dst_argb[1] = clamp255(g); 2064 dst_argb[2] = clamp255(r); 2065 dst_argb[3] = a; 2066 src_argb += 4; 2067 dst_argb += 4; 2068 } 2069 } 2070 2071 void ComputeCumulativeSumRow_C(const uint8* row, 2072 int32* cumsum, 2073 const int32* previous_cumsum, 2074 int width) { 2075 int32 row_sum[4] = {0, 0, 0, 0}; 2076 int x; 2077 for (x = 0; x < width; ++x) { 2078 row_sum[0] += row[x * 4 + 0]; 2079 row_sum[1] += row[x * 4 + 1]; 2080 row_sum[2] += row[x * 4 + 2]; 2081 row_sum[3] += row[x * 4 + 3]; 2082 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0]; 2083 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1]; 2084 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2]; 2085 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3]; 2086 } 2087 } 2088 2089 void CumulativeSumToAverageRow_C(const int32* tl, 2090 const int32* bl, 2091 int w, 2092 int area, 2093 uint8* dst, 2094 int count) { 2095 float ooa = 1.0f / area; 2096 int i; 2097 for (i = 0; i < count; ++i) { 2098 dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); 2099 dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); 2100 dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); 2101 dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); 2102 dst += 4; 2103 tl += 4; 2104 bl += 4; 2105 } 2106 } 2107 2108 // Copy pixels from rotated source to destination row with a slope. 2109 LIBYUV_API 2110 void ARGBAffineRow_C(const uint8* src_argb, 2111 int src_argb_stride, 2112 uint8* dst_argb, 2113 const float* uv_dudv, 2114 int width) { 2115 int i; 2116 // Render a row of pixels from source into a buffer. 2117 float uv[2]; 2118 uv[0] = uv_dudv[0]; 2119 uv[1] = uv_dudv[1]; 2120 for (i = 0; i < width; ++i) { 2121 int x = (int)(uv[0]); 2122 int y = (int)(uv[1]); 2123 *(uint32*)(dst_argb) = 2124 *(const uint32*)(src_argb + y * src_argb_stride + x * 4); 2125 dst_argb += 4; 2126 uv[0] += uv_dudv[2]; 2127 uv[1] += uv_dudv[3]; 2128 } 2129 } 2130 2131 // Blend 2 rows into 1. 2132 static void HalfRow_C(const uint8* src_uv, 2133 ptrdiff_t src_uv_stride, 2134 uint8* dst_uv, 2135 int width) { 2136 int x; 2137 for (x = 0; x < width; ++x) { 2138 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; 2139 } 2140 } 2141 2142 static void HalfRow_16_C(const uint16* src_uv, 2143 ptrdiff_t src_uv_stride, 2144 uint16* dst_uv, 2145 int width) { 2146 int x; 2147 for (x = 0; x < width; ++x) { 2148 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; 2149 } 2150 } 2151 2152 // C version 2x2 -> 2x1. 2153 void InterpolateRow_C(uint8* dst_ptr, 2154 const uint8* src_ptr, 2155 ptrdiff_t src_stride, 2156 int width, 2157 int source_y_fraction) { 2158 int y1_fraction = source_y_fraction; 2159 int y0_fraction = 256 - y1_fraction; 2160 const uint8* src_ptr1 = src_ptr + src_stride; 2161 int x; 2162 if (y1_fraction == 0) { 2163 memcpy(dst_ptr, src_ptr, width); 2164 return; 2165 } 2166 if (y1_fraction == 128) { 2167 HalfRow_C(src_ptr, src_stride, dst_ptr, width); 2168 return; 2169 } 2170 for (x = 0; x < width - 1; x += 2) { 2171 dst_ptr[0] = 2172 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; 2173 dst_ptr[1] = 2174 (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8; 2175 src_ptr += 2; 2176 src_ptr1 += 2; 2177 dst_ptr += 2; 2178 } 2179 if (width & 1) { 2180 dst_ptr[0] = 2181 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; 2182 } 2183 } 2184 2185 void InterpolateRow_16_C(uint16* dst_ptr, 2186 const uint16* src_ptr, 2187 ptrdiff_t src_stride, 2188 int width, 2189 int source_y_fraction) { 2190 int y1_fraction = source_y_fraction; 2191 int y0_fraction = 256 - y1_fraction; 2192 const uint16* src_ptr1 = src_ptr + src_stride; 2193 int x; 2194 if (source_y_fraction == 0) { 2195 memcpy(dst_ptr, src_ptr, width * 2); 2196 return; 2197 } 2198 if (source_y_fraction == 128) { 2199 HalfRow_16_C(src_ptr, src_stride, dst_ptr, width); 2200 return; 2201 } 2202 for (x = 0; x < width - 1; x += 2) { 2203 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 2204 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; 2205 src_ptr += 2; 2206 src_ptr1 += 2; 2207 dst_ptr += 2; 2208 } 2209 if (width & 1) { 2210 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 2211 } 2212 } 2213 2214 // Use first 4 shuffler values to reorder ARGB channels. 2215 void ARGBShuffleRow_C(const uint8* src_argb, 2216 uint8* dst_argb, 2217 const uint8* shuffler, 2218 int width) { 2219 int index0 = shuffler[0]; 2220 int index1 = shuffler[1]; 2221 int index2 = shuffler[2]; 2222 int index3 = shuffler[3]; 2223 // Shuffle a row of ARGB. 2224 int x; 2225 for (x = 0; x < width; ++x) { 2226 // To support in-place conversion. 2227 uint8 b = src_argb[index0]; 2228 uint8 g = src_argb[index1]; 2229 uint8 r = src_argb[index2]; 2230 uint8 a = src_argb[index3]; 2231 dst_argb[0] = b; 2232 dst_argb[1] = g; 2233 dst_argb[2] = r; 2234 dst_argb[3] = a; 2235 src_argb += 4; 2236 dst_argb += 4; 2237 } 2238 } 2239 2240 void I422ToYUY2Row_C(const uint8* src_y, 2241 const uint8* src_u, 2242 const uint8* src_v, 2243 uint8* dst_frame, 2244 int width) { 2245 int x; 2246 for (x = 0; x < width - 1; x += 2) { 2247 dst_frame[0] = src_y[0]; 2248 dst_frame[1] = src_u[0]; 2249 dst_frame[2] = src_y[1]; 2250 dst_frame[3] = src_v[0]; 2251 dst_frame += 4; 2252 src_y += 2; 2253 src_u += 1; 2254 src_v += 1; 2255 } 2256 if (width & 1) { 2257 dst_frame[0] = src_y[0]; 2258 dst_frame[1] = src_u[0]; 2259 dst_frame[2] = 0; 2260 dst_frame[3] = src_v[0]; 2261 } 2262 } 2263 2264 void I422ToUYVYRow_C(const uint8* src_y, 2265 const uint8* src_u, 2266 const uint8* src_v, 2267 uint8* dst_frame, 2268 int width) { 2269 int x; 2270 for (x = 0; x < width - 1; x += 2) { 2271 dst_frame[0] = src_u[0]; 2272 dst_frame[1] = src_y[0]; 2273 dst_frame[2] = src_v[0]; 2274 dst_frame[3] = src_y[1]; 2275 dst_frame += 4; 2276 src_y += 2; 2277 src_u += 1; 2278 src_v += 1; 2279 } 2280 if (width & 1) { 2281 dst_frame[0] = src_u[0]; 2282 dst_frame[1] = src_y[0]; 2283 dst_frame[2] = src_v[0]; 2284 dst_frame[3] = 0; 2285 } 2286 } 2287 2288 void ARGBPolynomialRow_C(const uint8* src_argb, 2289 uint8* dst_argb, 2290 const float* poly, 2291 int width) { 2292 int i; 2293 for (i = 0; i < width; ++i) { 2294 float b = (float)(src_argb[0]); 2295 float g = (float)(src_argb[1]); 2296 float r = (float)(src_argb[2]); 2297 float a = (float)(src_argb[3]); 2298 float b2 = b * b; 2299 float g2 = g * g; 2300 float r2 = r * r; 2301 float a2 = a * a; 2302 float db = poly[0] + poly[4] * b; 2303 float dg = poly[1] + poly[5] * g; 2304 float dr = poly[2] + poly[6] * r; 2305 float da = poly[3] + poly[7] * a; 2306 float b3 = b2 * b; 2307 float g3 = g2 * g; 2308 float r3 = r2 * r; 2309 float a3 = a2 * a; 2310 db += poly[8] * b2; 2311 dg += poly[9] * g2; 2312 dr += poly[10] * r2; 2313 da += poly[11] * a2; 2314 db += poly[12] * b3; 2315 dg += poly[13] * g3; 2316 dr += poly[14] * r3; 2317 da += poly[15] * a3; 2318 2319 dst_argb[0] = Clamp((int32)(db)); 2320 dst_argb[1] = Clamp((int32)(dg)); 2321 dst_argb[2] = Clamp((int32)(dr)); 2322 dst_argb[3] = Clamp((int32)(da)); 2323 src_argb += 4; 2324 dst_argb += 4; 2325 } 2326 } 2327 2328 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor 2329 // adjust the source integer range to the half float range desired. 2330 2331 // This magic constant is 2^-112. Multiplying by this 2332 // is the same as subtracting 112 from the exponent, which 2333 // is the difference in exponent bias between 32-bit and 2334 // 16-bit floats. Once we've done this subtraction, we can 2335 // simply extract the low bits of the exponent and the high 2336 // bits of the mantissa from our float and we're done. 2337 2338 void HalfFloatRow_C(const uint16* src, uint16* dst, float scale, int width) { 2339 int i; 2340 float mult = 1.9259299444e-34f * scale; 2341 for (i = 0; i < width; ++i) { 2342 float value = src[i] * mult; 2343 dst[i] = (uint16)((*(uint32_t*)&value) >> 13); 2344 } 2345 } 2346 2347 void ARGBLumaColorTableRow_C(const uint8* src_argb, 2348 uint8* dst_argb, 2349 int width, 2350 const uint8* luma, 2351 uint32 lumacoeff) { 2352 uint32 bc = lumacoeff & 0xff; 2353 uint32 gc = (lumacoeff >> 8) & 0xff; 2354 uint32 rc = (lumacoeff >> 16) & 0xff; 2355 2356 int i; 2357 for (i = 0; i < width - 1; i += 2) { 2358 // Luminance in rows, color values in columns. 2359 const uint8* luma0 = 2360 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) + 2361 luma; 2362 const uint8* luma1; 2363 dst_argb[0] = luma0[src_argb[0]]; 2364 dst_argb[1] = luma0[src_argb[1]]; 2365 dst_argb[2] = luma0[src_argb[2]]; 2366 dst_argb[3] = src_argb[3]; 2367 luma1 = 2368 ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) + 2369 luma; 2370 dst_argb[4] = luma1[src_argb[4]]; 2371 dst_argb[5] = luma1[src_argb[5]]; 2372 dst_argb[6] = luma1[src_argb[6]]; 2373 dst_argb[7] = src_argb[7]; 2374 src_argb += 8; 2375 dst_argb += 8; 2376 } 2377 if (width & 1) { 2378 // Luminance in rows, color values in columns. 2379 const uint8* luma0 = 2380 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) + 2381 luma; 2382 dst_argb[0] = luma0[src_argb[0]]; 2383 dst_argb[1] = luma0[src_argb[1]]; 2384 dst_argb[2] = luma0[src_argb[2]]; 2385 dst_argb[3] = src_argb[3]; 2386 } 2387 } 2388 2389 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { 2390 int i; 2391 for (i = 0; i < width - 1; i += 2) { 2392 dst[3] = src[3]; 2393 dst[7] = src[7]; 2394 dst += 8; 2395 src += 8; 2396 } 2397 if (width & 1) { 2398 dst[3] = src[3]; 2399 } 2400 } 2401 2402 void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) { 2403 int i; 2404 for (i = 0; i < width - 1; i += 2) { 2405 dst_a[0] = src_argb[3]; 2406 dst_a[1] = src_argb[7]; 2407 dst_a += 2; 2408 src_argb += 8; 2409 } 2410 if (width & 1) { 2411 dst_a[0] = src_argb[3]; 2412 } 2413 } 2414 2415 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { 2416 int i; 2417 for (i = 0; i < width - 1; i += 2) { 2418 dst[3] = src[0]; 2419 dst[7] = src[1]; 2420 dst += 8; 2421 src += 2; 2422 } 2423 if (width & 1) { 2424 dst[3] = src[0]; 2425 } 2426 } 2427 2428 // Maximum temporary width for wrappers to process at a time, in pixels. 2429 #define MAXTWIDTH 2048 2430 2431 #if !(defined(_MSC_VER) && defined(_M_IX86)) && \ 2432 defined(HAS_I422TORGB565ROW_SSSE3) 2433 // row_win.cc has asm version, but GCC uses 2 step wrapper. 2434 void I422ToRGB565Row_SSSE3(const uint8* src_y, 2435 const uint8* src_u, 2436 const uint8* src_v, 2437 uint8* dst_rgb565, 2438 const struct YuvConstants* yuvconstants, 2439 int width) { 2440 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2441 while (width > 0) { 2442 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2443 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); 2444 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2445 src_y += twidth; 2446 src_u += twidth / 2; 2447 src_v += twidth / 2; 2448 dst_rgb565 += twidth * 2; 2449 width -= twidth; 2450 } 2451 } 2452 #endif 2453 2454 #if defined(HAS_I422TOARGB1555ROW_SSSE3) 2455 void I422ToARGB1555Row_SSSE3(const uint8* src_y, 2456 const uint8* src_u, 2457 const uint8* src_v, 2458 uint8* dst_argb1555, 2459 const struct YuvConstants* yuvconstants, 2460 int width) { 2461 // Row buffer for intermediate ARGB pixels. 2462 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2463 while (width > 0) { 2464 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2465 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); 2466 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth); 2467 src_y += twidth; 2468 src_u += twidth / 2; 2469 src_v += twidth / 2; 2470 dst_argb1555 += twidth * 2; 2471 width -= twidth; 2472 } 2473 } 2474 #endif 2475 2476 #if defined(HAS_I422TOARGB4444ROW_SSSE3) 2477 void I422ToARGB4444Row_SSSE3(const uint8* src_y, 2478 const uint8* src_u, 2479 const uint8* src_v, 2480 uint8* dst_argb4444, 2481 const struct YuvConstants* yuvconstants, 2482 int width) { 2483 // Row buffer for intermediate ARGB pixels. 2484 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2485 while (width > 0) { 2486 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2487 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); 2488 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth); 2489 src_y += twidth; 2490 src_u += twidth / 2; 2491 src_v += twidth / 2; 2492 dst_argb4444 += twidth * 2; 2493 width -= twidth; 2494 } 2495 } 2496 #endif 2497 2498 #if defined(HAS_NV12TORGB565ROW_SSSE3) 2499 void NV12ToRGB565Row_SSSE3(const uint8* src_y, 2500 const uint8* src_uv, 2501 uint8* dst_rgb565, 2502 const struct YuvConstants* yuvconstants, 2503 int width) { 2504 // Row buffer for intermediate ARGB pixels. 2505 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2506 while (width > 0) { 2507 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2508 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth); 2509 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2510 src_y += twidth; 2511 src_uv += twidth; 2512 dst_rgb565 += twidth * 2; 2513 width -= twidth; 2514 } 2515 } 2516 #endif 2517 2518 #if defined(HAS_I422TORGB565ROW_AVX2) 2519 void I422ToRGB565Row_AVX2(const uint8* src_y, 2520 const uint8* src_u, 2521 const uint8* src_v, 2522 uint8* dst_rgb565, 2523 const struct YuvConstants* yuvconstants, 2524 int width) { 2525 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2526 while (width > 0) { 2527 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2528 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); 2529 #if defined(HAS_ARGBTORGB565ROW_AVX2) 2530 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); 2531 #else 2532 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2533 #endif 2534 src_y += twidth; 2535 src_u += twidth / 2; 2536 src_v += twidth / 2; 2537 dst_rgb565 += twidth * 2; 2538 width -= twidth; 2539 } 2540 } 2541 #endif 2542 2543 #if defined(HAS_I422TOARGB1555ROW_AVX2) 2544 void I422ToARGB1555Row_AVX2(const uint8* src_y, 2545 const uint8* src_u, 2546 const uint8* src_v, 2547 uint8* dst_argb1555, 2548 const struct YuvConstants* yuvconstants, 2549 int width) { 2550 // Row buffer for intermediate ARGB pixels. 2551 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2552 while (width > 0) { 2553 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2554 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); 2555 #if defined(HAS_ARGBTOARGB1555ROW_AVX2) 2556 ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth); 2557 #else 2558 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth); 2559 #endif 2560 src_y += twidth; 2561 src_u += twidth / 2; 2562 src_v += twidth / 2; 2563 dst_argb1555 += twidth * 2; 2564 width -= twidth; 2565 } 2566 } 2567 #endif 2568 2569 #if defined(HAS_I422TOARGB4444ROW_AVX2) 2570 void I422ToARGB4444Row_AVX2(const uint8* src_y, 2571 const uint8* src_u, 2572 const uint8* src_v, 2573 uint8* dst_argb4444, 2574 const struct YuvConstants* yuvconstants, 2575 int width) { 2576 // Row buffer for intermediate ARGB pixels. 2577 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2578 while (width > 0) { 2579 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2580 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); 2581 #if defined(HAS_ARGBTOARGB4444ROW_AVX2) 2582 ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth); 2583 #else 2584 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth); 2585 #endif 2586 src_y += twidth; 2587 src_u += twidth / 2; 2588 src_v += twidth / 2; 2589 dst_argb4444 += twidth * 2; 2590 width -= twidth; 2591 } 2592 } 2593 #endif 2594 2595 #if defined(HAS_I422TORGB24ROW_AVX2) 2596 void I422ToRGB24Row_AVX2(const uint8* src_y, 2597 const uint8* src_u, 2598 const uint8* src_v, 2599 uint8* dst_rgb24, 2600 const struct YuvConstants* yuvconstants, 2601 int width) { 2602 // Row buffer for intermediate ARGB pixels. 2603 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2604 while (width > 0) { 2605 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2606 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); 2607 // TODO(fbarchard): ARGBToRGB24Row_AVX2 2608 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); 2609 src_y += twidth; 2610 src_u += twidth / 2; 2611 src_v += twidth / 2; 2612 dst_rgb24 += twidth * 3; 2613 width -= twidth; 2614 } 2615 } 2616 #endif 2617 2618 #if defined(HAS_NV12TORGB565ROW_AVX2) 2619 void NV12ToRGB565Row_AVX2(const uint8* src_y, 2620 const uint8* src_uv, 2621 uint8* dst_rgb565, 2622 const struct YuvConstants* yuvconstants, 2623 int width) { 2624 // Row buffer for intermediate ARGB pixels. 2625 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2626 while (width > 0) { 2627 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2628 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); 2629 #if defined(HAS_ARGBTORGB565ROW_AVX2) 2630 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); 2631 #else 2632 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2633 #endif 2634 src_y += twidth; 2635 src_uv += twidth; 2636 dst_rgb565 += twidth * 2; 2637 width -= twidth; 2638 } 2639 } 2640 #endif 2641 2642 #ifdef __cplusplus 2643 } // extern "C" 2644 } // namespace libyuv 2645 #endif 2646