1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/planar_functions.h" 12 13 #include <string.h> // for memset() 14 15 #include "libyuv/cpu_id.h" 16 #ifdef HAVE_JPEG 17 #include "libyuv/mjpeg_decoder.h" 18 #endif 19 #include "libyuv/row.h" 20 #include "libyuv/scale_row.h" // for ScaleRowDown2 21 22 #ifdef __cplusplus 23 namespace libyuv { 24 extern "C" { 25 #endif 26 27 // Copy a plane of data 28 LIBYUV_API 29 void CopyPlane(const uint8* src_y, 30 int src_stride_y, 31 uint8* dst_y, 32 int dst_stride_y, 33 int width, 34 int height) { 35 int y; 36 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; 37 // Negative height means invert the image. 38 if (height < 0) { 39 height = -height; 40 dst_y = dst_y + (height - 1) * dst_stride_y; 41 dst_stride_y = -dst_stride_y; 42 } 43 // Coalesce rows. 44 if (src_stride_y == width && dst_stride_y == width) { 45 width *= height; 46 height = 1; 47 src_stride_y = dst_stride_y = 0; 48 } 49 // Nothing to do. 50 if (src_y == dst_y && src_stride_y == dst_stride_y) { 51 return; 52 } 53 #if defined(HAS_COPYROW_SSE2) 54 if (TestCpuFlag(kCpuHasSSE2)) { 55 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; 56 } 57 #endif 58 #if defined(HAS_COPYROW_AVX) 59 if (TestCpuFlag(kCpuHasAVX)) { 60 CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX; 61 } 62 #endif 63 #if defined(HAS_COPYROW_ERMS) 64 if (TestCpuFlag(kCpuHasERMS)) { 65 CopyRow = CopyRow_ERMS; 66 } 67 #endif 68 #if defined(HAS_COPYROW_NEON) 69 if (TestCpuFlag(kCpuHasNEON)) { 70 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; 71 } 72 #endif 73 #if defined(HAS_COPYROW_MIPS) 74 if (TestCpuFlag(kCpuHasMIPS)) { 75 CopyRow = CopyRow_MIPS; 76 } 77 #endif 78 79 // Copy plane 80 for (y = 0; y < height; ++y) { 81 CopyRow(src_y, dst_y, width); 82 src_y += src_stride_y; 83 dst_y += dst_stride_y; 84 } 85 } 86 87 // TODO(fbarchard): Consider support for negative height. 88 // TODO(fbarchard): Consider stride measured in bytes. 89 LIBYUV_API 90 void CopyPlane_16(const uint16* src_y, 91 int src_stride_y, 92 uint16* dst_y, 93 int dst_stride_y, 94 int width, 95 int height) { 96 int y; 97 void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C; 98 // Coalesce rows. 99 if (src_stride_y == width && dst_stride_y == width) { 100 width *= height; 101 height = 1; 102 src_stride_y = dst_stride_y = 0; 103 } 104 #if defined(HAS_COPYROW_16_SSE2) 105 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) { 106 CopyRow = CopyRow_16_SSE2; 107 } 108 #endif 109 #if defined(HAS_COPYROW_16_ERMS) 110 if (TestCpuFlag(kCpuHasERMS)) { 111 CopyRow = CopyRow_16_ERMS; 112 } 113 #endif 114 #if defined(HAS_COPYROW_16_NEON) 115 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { 116 CopyRow = CopyRow_16_NEON; 117 } 118 #endif 119 #if defined(HAS_COPYROW_16_MIPS) 120 if (TestCpuFlag(kCpuHasMIPS)) { 121 CopyRow = CopyRow_16_MIPS; 122 } 123 #endif 124 125 // Copy plane 126 for (y = 0; y < height; ++y) { 127 CopyRow(src_y, dst_y, width); 128 src_y += src_stride_y; 129 dst_y += dst_stride_y; 130 } 131 } 132 133 // Copy I422. 134 LIBYUV_API 135 int I422Copy(const uint8* src_y, 136 int src_stride_y, 137 const uint8* src_u, 138 int src_stride_u, 139 const uint8* src_v, 140 int src_stride_v, 141 uint8* dst_y, 142 int dst_stride_y, 143 uint8* dst_u, 144 int dst_stride_u, 145 uint8* dst_v, 146 int dst_stride_v, 147 int width, 148 int height) { 149 int halfwidth = (width + 1) >> 1; 150 if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { 151 return -1; 152 } 153 // Negative height means invert the image. 154 if (height < 0) { 155 height = -height; 156 src_y = src_y + (height - 1) * src_stride_y; 157 src_u = src_u + (height - 1) * src_stride_u; 158 src_v = src_v + (height - 1) * src_stride_v; 159 src_stride_y = -src_stride_y; 160 src_stride_u = -src_stride_u; 161 src_stride_v = -src_stride_v; 162 } 163 164 if (dst_y) { 165 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 166 } 167 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); 168 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); 169 return 0; 170 } 171 172 // Copy I444. 173 LIBYUV_API 174 int I444Copy(const uint8* src_y, 175 int src_stride_y, 176 const uint8* src_u, 177 int src_stride_u, 178 const uint8* src_v, 179 int src_stride_v, 180 uint8* dst_y, 181 int dst_stride_y, 182 uint8* dst_u, 183 int dst_stride_u, 184 uint8* dst_v, 185 int dst_stride_v, 186 int width, 187 int height) { 188 if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { 189 return -1; 190 } 191 // Negative height means invert the image. 192 if (height < 0) { 193 height = -height; 194 src_y = src_y + (height - 1) * src_stride_y; 195 src_u = src_u + (height - 1) * src_stride_u; 196 src_v = src_v + (height - 1) * src_stride_v; 197 src_stride_y = -src_stride_y; 198 src_stride_u = -src_stride_u; 199 src_stride_v = -src_stride_v; 200 } 201 202 if (dst_y) { 203 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 204 } 205 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height); 206 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height); 207 return 0; 208 } 209 210 // Copy I400. 211 LIBYUV_API 212 int I400ToI400(const uint8* src_y, 213 int src_stride_y, 214 uint8* dst_y, 215 int dst_stride_y, 216 int width, 217 int height) { 218 if (!src_y || !dst_y || width <= 0 || height == 0) { 219 return -1; 220 } 221 // Negative height means invert the image. 222 if (height < 0) { 223 height = -height; 224 src_y = src_y + (height - 1) * src_stride_y; 225 src_stride_y = -src_stride_y; 226 } 227 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 228 return 0; 229 } 230 231 // Convert I420 to I400. 232 LIBYUV_API 233 int I420ToI400(const uint8* src_y, 234 int src_stride_y, 235 const uint8* src_u, 236 int src_stride_u, 237 const uint8* src_v, 238 int src_stride_v, 239 uint8* dst_y, 240 int dst_stride_y, 241 int width, 242 int height) { 243 (void)src_u; 244 (void)src_stride_u; 245 (void)src_v; 246 (void)src_stride_v; 247 if (!src_y || !dst_y || width <= 0 || height == 0) { 248 return -1; 249 } 250 // Negative height means invert the image. 251 if (height < 0) { 252 height = -height; 253 src_y = src_y + (height - 1) * src_stride_y; 254 src_stride_y = -src_stride_y; 255 } 256 257 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 258 return 0; 259 } 260 261 // Support function for NV12 etc UV channels. 262 // Width and height are plane sizes (typically half pixel width). 263 LIBYUV_API 264 void SplitUVPlane(const uint8* src_uv, 265 int src_stride_uv, 266 uint8* dst_u, 267 int dst_stride_u, 268 uint8* dst_v, 269 int dst_stride_v, 270 int width, 271 int height) { 272 int y; 273 void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 274 int width) = SplitUVRow_C; 275 // Negative height means invert the image. 276 if (height < 0) { 277 height = -height; 278 dst_u = dst_u + (height - 1) * dst_stride_u; 279 dst_v = dst_v + (height - 1) * dst_stride_v; 280 dst_stride_u = -dst_stride_u; 281 dst_stride_v = -dst_stride_v; 282 } 283 // Coalesce rows. 284 if (src_stride_uv == width * 2 && dst_stride_u == width && 285 dst_stride_v == width) { 286 width *= height; 287 height = 1; 288 src_stride_uv = dst_stride_u = dst_stride_v = 0; 289 } 290 #if defined(HAS_SPLITUVROW_SSE2) 291 if (TestCpuFlag(kCpuHasSSE2)) { 292 SplitUVRow = SplitUVRow_Any_SSE2; 293 if (IS_ALIGNED(width, 16)) { 294 SplitUVRow = SplitUVRow_SSE2; 295 } 296 } 297 #endif 298 #if defined(HAS_SPLITUVROW_AVX2) 299 if (TestCpuFlag(kCpuHasAVX2)) { 300 SplitUVRow = SplitUVRow_Any_AVX2; 301 if (IS_ALIGNED(width, 32)) { 302 SplitUVRow = SplitUVRow_AVX2; 303 } 304 } 305 #endif 306 #if defined(HAS_SPLITUVROW_NEON) 307 if (TestCpuFlag(kCpuHasNEON)) { 308 SplitUVRow = SplitUVRow_Any_NEON; 309 if (IS_ALIGNED(width, 16)) { 310 SplitUVRow = SplitUVRow_NEON; 311 } 312 } 313 #endif 314 #if defined(HAS_SPLITUVROW_DSPR2) 315 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_u, 4) && 316 IS_ALIGNED(dst_stride_u, 4) && IS_ALIGNED(dst_v, 4) && 317 IS_ALIGNED(dst_stride_v, 4)) { 318 SplitUVRow = SplitUVRow_Any_DSPR2; 319 if (IS_ALIGNED(width, 16)) { 320 SplitUVRow = SplitUVRow_DSPR2; 321 } 322 } 323 #endif 324 325 for (y = 0; y < height; ++y) { 326 // Copy a row of UV. 327 SplitUVRow(src_uv, dst_u, dst_v, width); 328 dst_u += dst_stride_u; 329 dst_v += dst_stride_v; 330 src_uv += src_stride_uv; 331 } 332 } 333 334 LIBYUV_API 335 void MergeUVPlane(const uint8* src_u, 336 int src_stride_u, 337 const uint8* src_v, 338 int src_stride_v, 339 uint8* dst_uv, 340 int dst_stride_uv, 341 int width, 342 int height) { 343 int y; 344 void (*MergeUVRow)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 345 int width) = MergeUVRow_C; 346 // Coalesce rows. 347 // Negative height means invert the image. 348 if (height < 0) { 349 height = -height; 350 dst_uv = dst_uv + (height - 1) * dst_stride_uv; 351 dst_stride_uv = -dst_stride_uv; 352 } 353 // Coalesce rows. 354 if (src_stride_u == width && src_stride_v == width && 355 dst_stride_uv == width * 2) { 356 width *= height; 357 height = 1; 358 src_stride_u = src_stride_v = dst_stride_uv = 0; 359 } 360 #if defined(HAS_MERGEUVROW_SSE2) 361 if (TestCpuFlag(kCpuHasSSE2)) { 362 MergeUVRow = MergeUVRow_Any_SSE2; 363 if (IS_ALIGNED(width, 16)) { 364 MergeUVRow = MergeUVRow_SSE2; 365 } 366 } 367 #endif 368 #if defined(HAS_MERGEUVROW_AVX2) 369 if (TestCpuFlag(kCpuHasAVX2)) { 370 MergeUVRow = MergeUVRow_Any_AVX2; 371 if (IS_ALIGNED(width, 32)) { 372 MergeUVRow = MergeUVRow_AVX2; 373 } 374 } 375 #endif 376 #if defined(HAS_MERGEUVROW_NEON) 377 if (TestCpuFlag(kCpuHasNEON)) { 378 MergeUVRow = MergeUVRow_Any_NEON; 379 if (IS_ALIGNED(width, 16)) { 380 MergeUVRow = MergeUVRow_NEON; 381 } 382 } 383 #endif 384 #if defined(HAS_MERGEUVROW_MSA) 385 if (TestCpuFlag(kCpuHasMSA)) { 386 MergeUVRow = MergeUVRow_Any_MSA; 387 if (IS_ALIGNED(width, 16)) { 388 MergeUVRow = MergeUVRow_MSA; 389 } 390 } 391 #endif 392 393 for (y = 0; y < height; ++y) { 394 // Merge a row of U and V into a row of UV. 395 MergeUVRow(src_u, src_v, dst_uv, width); 396 src_u += src_stride_u; 397 src_v += src_stride_v; 398 dst_uv += dst_stride_uv; 399 } 400 } 401 402 // Mirror a plane of data. 403 void MirrorPlane(const uint8* src_y, 404 int src_stride_y, 405 uint8* dst_y, 406 int dst_stride_y, 407 int width, 408 int height) { 409 int y; 410 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; 411 // Negative height means invert the image. 412 if (height < 0) { 413 height = -height; 414 src_y = src_y + (height - 1) * src_stride_y; 415 src_stride_y = -src_stride_y; 416 } 417 #if defined(HAS_MIRRORROW_NEON) 418 if (TestCpuFlag(kCpuHasNEON)) { 419 MirrorRow = MirrorRow_Any_NEON; 420 if (IS_ALIGNED(width, 16)) { 421 MirrorRow = MirrorRow_NEON; 422 } 423 } 424 #endif 425 #if defined(HAS_MIRRORROW_SSSE3) 426 if (TestCpuFlag(kCpuHasSSSE3)) { 427 MirrorRow = MirrorRow_Any_SSSE3; 428 if (IS_ALIGNED(width, 16)) { 429 MirrorRow = MirrorRow_SSSE3; 430 } 431 } 432 #endif 433 #if defined(HAS_MIRRORROW_AVX2) 434 if (TestCpuFlag(kCpuHasAVX2)) { 435 MirrorRow = MirrorRow_Any_AVX2; 436 if (IS_ALIGNED(width, 32)) { 437 MirrorRow = MirrorRow_AVX2; 438 } 439 } 440 #endif 441 // TODO(fbarchard): Mirror on mips handle unaligned memory. 442 #if defined(HAS_MIRRORROW_DSPR2) 443 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_y, 4) && 444 IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(dst_y, 4) && 445 IS_ALIGNED(dst_stride_y, 4)) { 446 MirrorRow = MirrorRow_DSPR2; 447 } 448 #endif 449 #if defined(HAS_MIRRORROW_MSA) 450 if (TestCpuFlag(kCpuHasMSA)) { 451 MirrorRow = MirrorRow_Any_MSA; 452 if (IS_ALIGNED(width, 64)) { 453 MirrorRow = MirrorRow_MSA; 454 } 455 } 456 #endif 457 458 // Mirror plane 459 for (y = 0; y < height; ++y) { 460 MirrorRow(src_y, dst_y, width); 461 src_y += src_stride_y; 462 dst_y += dst_stride_y; 463 } 464 } 465 466 // Convert YUY2 to I422. 467 LIBYUV_API 468 int YUY2ToI422(const uint8* src_yuy2, 469 int src_stride_yuy2, 470 uint8* dst_y, 471 int dst_stride_y, 472 uint8* dst_u, 473 int dst_stride_u, 474 uint8* dst_v, 475 int dst_stride_v, 476 int width, 477 int height) { 478 int y; 479 void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, 480 int width) = YUY2ToUV422Row_C; 481 void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) = 482 YUY2ToYRow_C; 483 if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { 484 return -1; 485 } 486 // Negative height means invert the image. 487 if (height < 0) { 488 height = -height; 489 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; 490 src_stride_yuy2 = -src_stride_yuy2; 491 } 492 // Coalesce rows. 493 if (src_stride_yuy2 == width * 2 && dst_stride_y == width && 494 dst_stride_u * 2 == width && dst_stride_v * 2 == width && 495 width * height <= 32768) { 496 width *= height; 497 height = 1; 498 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0; 499 } 500 #if defined(HAS_YUY2TOYROW_SSE2) 501 if (TestCpuFlag(kCpuHasSSE2)) { 502 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; 503 YUY2ToYRow = YUY2ToYRow_Any_SSE2; 504 if (IS_ALIGNED(width, 16)) { 505 YUY2ToUV422Row = YUY2ToUV422Row_SSE2; 506 YUY2ToYRow = YUY2ToYRow_SSE2; 507 } 508 } 509 #endif 510 #if defined(HAS_YUY2TOYROW_AVX2) 511 if (TestCpuFlag(kCpuHasAVX2)) { 512 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2; 513 YUY2ToYRow = YUY2ToYRow_Any_AVX2; 514 if (IS_ALIGNED(width, 32)) { 515 YUY2ToUV422Row = YUY2ToUV422Row_AVX2; 516 YUY2ToYRow = YUY2ToYRow_AVX2; 517 } 518 } 519 #endif 520 #if defined(HAS_YUY2TOYROW_NEON) 521 if (TestCpuFlag(kCpuHasNEON)) { 522 YUY2ToYRow = YUY2ToYRow_Any_NEON; 523 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; 524 if (IS_ALIGNED(width, 16)) { 525 YUY2ToYRow = YUY2ToYRow_NEON; 526 YUY2ToUV422Row = YUY2ToUV422Row_NEON; 527 } 528 } 529 #endif 530 #if defined(HAS_YUY2TOYROW_MSA) 531 if (TestCpuFlag(kCpuHasMSA)) { 532 YUY2ToYRow = YUY2ToYRow_Any_MSA; 533 YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA; 534 if (IS_ALIGNED(width, 32)) { 535 YUY2ToYRow = YUY2ToYRow_MSA; 536 YUY2ToUV422Row = YUY2ToUV422Row_MSA; 537 } 538 } 539 #endif 540 541 for (y = 0; y < height; ++y) { 542 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); 543 YUY2ToYRow(src_yuy2, dst_y, width); 544 src_yuy2 += src_stride_yuy2; 545 dst_y += dst_stride_y; 546 dst_u += dst_stride_u; 547 dst_v += dst_stride_v; 548 } 549 return 0; 550 } 551 552 // Convert UYVY to I422. 553 LIBYUV_API 554 int UYVYToI422(const uint8* src_uyvy, 555 int src_stride_uyvy, 556 uint8* dst_y, 557 int dst_stride_y, 558 uint8* dst_u, 559 int dst_stride_u, 560 uint8* dst_v, 561 int dst_stride_v, 562 int width, 563 int height) { 564 int y; 565 void (*UYVYToUV422Row)(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, 566 int width) = UYVYToUV422Row_C; 567 void (*UYVYToYRow)(const uint8* src_uyvy, uint8* dst_y, int width) = 568 UYVYToYRow_C; 569 if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { 570 return -1; 571 } 572 // Negative height means invert the image. 573 if (height < 0) { 574 height = -height; 575 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; 576 src_stride_uyvy = -src_stride_uyvy; 577 } 578 // Coalesce rows. 579 if (src_stride_uyvy == width * 2 && dst_stride_y == width && 580 dst_stride_u * 2 == width && dst_stride_v * 2 == width && 581 width * height <= 32768) { 582 width *= height; 583 height = 1; 584 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0; 585 } 586 #if defined(HAS_UYVYTOYROW_SSE2) 587 if (TestCpuFlag(kCpuHasSSE2)) { 588 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2; 589 UYVYToYRow = UYVYToYRow_Any_SSE2; 590 if (IS_ALIGNED(width, 16)) { 591 UYVYToUV422Row = UYVYToUV422Row_SSE2; 592 UYVYToYRow = UYVYToYRow_SSE2; 593 } 594 } 595 #endif 596 #if defined(HAS_UYVYTOYROW_AVX2) 597 if (TestCpuFlag(kCpuHasAVX2)) { 598 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2; 599 UYVYToYRow = UYVYToYRow_Any_AVX2; 600 if (IS_ALIGNED(width, 32)) { 601 UYVYToUV422Row = UYVYToUV422Row_AVX2; 602 UYVYToYRow = UYVYToYRow_AVX2; 603 } 604 } 605 #endif 606 #if defined(HAS_UYVYTOYROW_NEON) 607 if (TestCpuFlag(kCpuHasNEON)) { 608 UYVYToYRow = UYVYToYRow_Any_NEON; 609 UYVYToUV422Row = UYVYToUV422Row_Any_NEON; 610 if (IS_ALIGNED(width, 16)) { 611 UYVYToYRow = UYVYToYRow_NEON; 612 UYVYToUV422Row = UYVYToUV422Row_NEON; 613 } 614 } 615 #endif 616 #if defined(HAS_UYVYTOYROW_MSA) 617 if (TestCpuFlag(kCpuHasMSA)) { 618 UYVYToYRow = UYVYToYRow_Any_MSA; 619 UYVYToUV422Row = UYVYToUV422Row_Any_MSA; 620 if (IS_ALIGNED(width, 32)) { 621 UYVYToYRow = UYVYToYRow_MSA; 622 UYVYToUV422Row = UYVYToUV422Row_MSA; 623 } 624 } 625 #endif 626 627 for (y = 0; y < height; ++y) { 628 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width); 629 UYVYToYRow(src_uyvy, dst_y, width); 630 src_uyvy += src_stride_uyvy; 631 dst_y += dst_stride_y; 632 dst_u += dst_stride_u; 633 dst_v += dst_stride_v; 634 } 635 return 0; 636 } 637 638 // Convert YUY2 to Y. 639 LIBYUV_API 640 int YUY2ToY(const uint8* src_yuy2, 641 int src_stride_yuy2, 642 uint8* dst_y, 643 int dst_stride_y, 644 int width, 645 int height) { 646 int y; 647 void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) = 648 YUY2ToYRow_C; 649 if (!src_yuy2 || !dst_y || width <= 0 || height == 0) { 650 return -1; 651 } 652 // Negative height means invert the image. 653 if (height < 0) { 654 height = -height; 655 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; 656 src_stride_yuy2 = -src_stride_yuy2; 657 } 658 // Coalesce rows. 659 if (src_stride_yuy2 == width * 2 && dst_stride_y == width) { 660 width *= height; 661 height = 1; 662 src_stride_yuy2 = dst_stride_y = 0; 663 } 664 #if defined(HAS_YUY2TOYROW_SSE2) 665 if (TestCpuFlag(kCpuHasSSE2)) { 666 YUY2ToYRow = YUY2ToYRow_Any_SSE2; 667 if (IS_ALIGNED(width, 16)) { 668 YUY2ToYRow = YUY2ToYRow_SSE2; 669 } 670 } 671 #endif 672 #if defined(HAS_YUY2TOYROW_AVX2) 673 if (TestCpuFlag(kCpuHasAVX2)) { 674 YUY2ToYRow = YUY2ToYRow_Any_AVX2; 675 if (IS_ALIGNED(width, 32)) { 676 YUY2ToYRow = YUY2ToYRow_AVX2; 677 } 678 } 679 #endif 680 #if defined(HAS_YUY2TOYROW_NEON) 681 if (TestCpuFlag(kCpuHasNEON)) { 682 YUY2ToYRow = YUY2ToYRow_Any_NEON; 683 if (IS_ALIGNED(width, 16)) { 684 YUY2ToYRow = YUY2ToYRow_NEON; 685 } 686 } 687 #endif 688 #if defined(HAS_YUY2TOYROW_MSA) 689 if (TestCpuFlag(kCpuHasMSA)) { 690 YUY2ToYRow = YUY2ToYRow_Any_MSA; 691 if (IS_ALIGNED(width, 32)) { 692 YUY2ToYRow = YUY2ToYRow_MSA; 693 } 694 } 695 #endif 696 697 for (y = 0; y < height; ++y) { 698 YUY2ToYRow(src_yuy2, dst_y, width); 699 src_yuy2 += src_stride_yuy2; 700 dst_y += dst_stride_y; 701 } 702 return 0; 703 } 704 705 // Mirror I400 with optional flipping 706 LIBYUV_API 707 int I400Mirror(const uint8* src_y, 708 int src_stride_y, 709 uint8* dst_y, 710 int dst_stride_y, 711 int width, 712 int height) { 713 if (!src_y || !dst_y || width <= 0 || height == 0) { 714 return -1; 715 } 716 // Negative height means invert the image. 717 if (height < 0) { 718 height = -height; 719 src_y = src_y + (height - 1) * src_stride_y; 720 src_stride_y = -src_stride_y; 721 } 722 723 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 724 return 0; 725 } 726 727 // Mirror I420 with optional flipping 728 LIBYUV_API 729 int I420Mirror(const uint8* src_y, 730 int src_stride_y, 731 const uint8* src_u, 732 int src_stride_u, 733 const uint8* src_v, 734 int src_stride_v, 735 uint8* dst_y, 736 int dst_stride_y, 737 uint8* dst_u, 738 int dst_stride_u, 739 uint8* dst_v, 740 int dst_stride_v, 741 int width, 742 int height) { 743 int halfwidth = (width + 1) >> 1; 744 int halfheight = (height + 1) >> 1; 745 if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || width <= 0 || 746 height == 0) { 747 return -1; 748 } 749 // Negative height means invert the image. 750 if (height < 0) { 751 height = -height; 752 halfheight = (height + 1) >> 1; 753 src_y = src_y + (height - 1) * src_stride_y; 754 src_u = src_u + (halfheight - 1) * src_stride_u; 755 src_v = src_v + (halfheight - 1) * src_stride_v; 756 src_stride_y = -src_stride_y; 757 src_stride_u = -src_stride_u; 758 src_stride_v = -src_stride_v; 759 } 760 761 if (dst_y) { 762 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 763 } 764 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); 765 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); 766 return 0; 767 } 768 769 // ARGB mirror. 770 LIBYUV_API 771 int ARGBMirror(const uint8* src_argb, 772 int src_stride_argb, 773 uint8* dst_argb, 774 int dst_stride_argb, 775 int width, 776 int height) { 777 int y; 778 void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = 779 ARGBMirrorRow_C; 780 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 781 return -1; 782 } 783 // Negative height means invert the image. 784 if (height < 0) { 785 height = -height; 786 src_argb = src_argb + (height - 1) * src_stride_argb; 787 src_stride_argb = -src_stride_argb; 788 } 789 #if defined(HAS_ARGBMIRRORROW_NEON) 790 if (TestCpuFlag(kCpuHasNEON)) { 791 ARGBMirrorRow = ARGBMirrorRow_Any_NEON; 792 if (IS_ALIGNED(width, 4)) { 793 ARGBMirrorRow = ARGBMirrorRow_NEON; 794 } 795 } 796 #endif 797 #if defined(HAS_ARGBMIRRORROW_SSE2) 798 if (TestCpuFlag(kCpuHasSSE2)) { 799 ARGBMirrorRow = ARGBMirrorRow_Any_SSE2; 800 if (IS_ALIGNED(width, 4)) { 801 ARGBMirrorRow = ARGBMirrorRow_SSE2; 802 } 803 } 804 #endif 805 #if defined(HAS_ARGBMIRRORROW_AVX2) 806 if (TestCpuFlag(kCpuHasAVX2)) { 807 ARGBMirrorRow = ARGBMirrorRow_Any_AVX2; 808 if (IS_ALIGNED(width, 8)) { 809 ARGBMirrorRow = ARGBMirrorRow_AVX2; 810 } 811 } 812 #endif 813 #if defined(HAS_ARGBMIRRORROW_MSA) 814 if (TestCpuFlag(kCpuHasMSA)) { 815 ARGBMirrorRow = ARGBMirrorRow_Any_MSA; 816 if (IS_ALIGNED(width, 16)) { 817 ARGBMirrorRow = ARGBMirrorRow_MSA; 818 } 819 } 820 #endif 821 822 // Mirror plane 823 for (y = 0; y < height; ++y) { 824 ARGBMirrorRow(src_argb, dst_argb, width); 825 src_argb += src_stride_argb; 826 dst_argb += dst_stride_argb; 827 } 828 return 0; 829 } 830 831 // Get a blender that optimized for the CPU and pixel count. 832 // As there are 6 blenders to choose from, the caller should try to use 833 // the same blend function for all pixels if possible. 834 LIBYUV_API 835 ARGBBlendRow GetARGBBlend() { 836 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, 837 uint8* dst_argb, int width) = ARGBBlendRow_C; 838 #if defined(HAS_ARGBBLENDROW_SSSE3) 839 if (TestCpuFlag(kCpuHasSSSE3)) { 840 ARGBBlendRow = ARGBBlendRow_SSSE3; 841 return ARGBBlendRow; 842 } 843 #endif 844 #if defined(HAS_ARGBBLENDROW_NEON) 845 if (TestCpuFlag(kCpuHasNEON)) { 846 ARGBBlendRow = ARGBBlendRow_NEON; 847 } 848 #endif 849 return ARGBBlendRow; 850 } 851 852 // Alpha Blend 2 ARGB images and store to destination. 853 LIBYUV_API 854 int ARGBBlend(const uint8* src_argb0, 855 int src_stride_argb0, 856 const uint8* src_argb1, 857 int src_stride_argb1, 858 uint8* dst_argb, 859 int dst_stride_argb, 860 int width, 861 int height) { 862 int y; 863 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, 864 uint8* dst_argb, int width) = GetARGBBlend(); 865 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 866 return -1; 867 } 868 // Negative height means invert the image. 869 if (height < 0) { 870 height = -height; 871 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 872 dst_stride_argb = -dst_stride_argb; 873 } 874 // Coalesce rows. 875 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && 876 dst_stride_argb == width * 4) { 877 width *= height; 878 height = 1; 879 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 880 } 881 882 for (y = 0; y < height; ++y) { 883 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width); 884 src_argb0 += src_stride_argb0; 885 src_argb1 += src_stride_argb1; 886 dst_argb += dst_stride_argb; 887 } 888 return 0; 889 } 890 891 // Alpha Blend plane and store to destination. 892 LIBYUV_API 893 int BlendPlane(const uint8* src_y0, 894 int src_stride_y0, 895 const uint8* src_y1, 896 int src_stride_y1, 897 const uint8* alpha, 898 int alpha_stride, 899 uint8* dst_y, 900 int dst_stride_y, 901 int width, 902 int height) { 903 int y; 904 void (*BlendPlaneRow)(const uint8* src0, const uint8* src1, 905 const uint8* alpha, uint8* dst, int width) = 906 BlendPlaneRow_C; 907 if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) { 908 return -1; 909 } 910 // Negative height means invert the image. 911 if (height < 0) { 912 height = -height; 913 dst_y = dst_y + (height - 1) * dst_stride_y; 914 dst_stride_y = -dst_stride_y; 915 } 916 917 // Coalesce rows for Y plane. 918 if (src_stride_y0 == width && src_stride_y1 == width && 919 alpha_stride == width && dst_stride_y == width) { 920 width *= height; 921 height = 1; 922 src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0; 923 } 924 925 #if defined(HAS_BLENDPLANEROW_SSSE3) 926 if (TestCpuFlag(kCpuHasSSSE3)) { 927 BlendPlaneRow = BlendPlaneRow_Any_SSSE3; 928 if (IS_ALIGNED(width, 8)) { 929 BlendPlaneRow = BlendPlaneRow_SSSE3; 930 } 931 } 932 #endif 933 #if defined(HAS_BLENDPLANEROW_AVX2) 934 if (TestCpuFlag(kCpuHasAVX2)) { 935 BlendPlaneRow = BlendPlaneRow_Any_AVX2; 936 if (IS_ALIGNED(width, 32)) { 937 BlendPlaneRow = BlendPlaneRow_AVX2; 938 } 939 } 940 #endif 941 942 for (y = 0; y < height; ++y) { 943 BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width); 944 src_y0 += src_stride_y0; 945 src_y1 += src_stride_y1; 946 alpha += alpha_stride; 947 dst_y += dst_stride_y; 948 } 949 return 0; 950 } 951 952 #define MAXTWIDTH 2048 953 // Alpha Blend YUV images and store to destination. 954 LIBYUV_API 955 int I420Blend(const uint8* src_y0, 956 int src_stride_y0, 957 const uint8* src_u0, 958 int src_stride_u0, 959 const uint8* src_v0, 960 int src_stride_v0, 961 const uint8* src_y1, 962 int src_stride_y1, 963 const uint8* src_u1, 964 int src_stride_u1, 965 const uint8* src_v1, 966 int src_stride_v1, 967 const uint8* alpha, 968 int alpha_stride, 969 uint8* dst_y, 970 int dst_stride_y, 971 uint8* dst_u, 972 int dst_stride_u, 973 uint8* dst_v, 974 int dst_stride_v, 975 int width, 976 int height) { 977 int y; 978 // Half width/height for UV. 979 int halfwidth = (width + 1) >> 1; 980 void (*BlendPlaneRow)(const uint8* src0, const uint8* src1, 981 const uint8* alpha, uint8* dst, int width) = 982 BlendPlaneRow_C; 983 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, 984 uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C; 985 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 || 986 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { 987 return -1; 988 } 989 990 // Negative height means invert the image. 991 if (height < 0) { 992 height = -height; 993 dst_y = dst_y + (height - 1) * dst_stride_y; 994 dst_stride_y = -dst_stride_y; 995 } 996 997 // Blend Y plane. 998 BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride, 999 dst_y, dst_stride_y, width, height); 1000 1001 #if defined(HAS_BLENDPLANEROW_SSSE3) 1002 if (TestCpuFlag(kCpuHasSSSE3)) { 1003 BlendPlaneRow = BlendPlaneRow_Any_SSSE3; 1004 if (IS_ALIGNED(halfwidth, 8)) { 1005 BlendPlaneRow = BlendPlaneRow_SSSE3; 1006 } 1007 } 1008 #endif 1009 #if defined(HAS_BLENDPLANEROW_AVX2) 1010 if (TestCpuFlag(kCpuHasAVX2)) { 1011 BlendPlaneRow = BlendPlaneRow_Any_AVX2; 1012 if (IS_ALIGNED(halfwidth, 32)) { 1013 BlendPlaneRow = BlendPlaneRow_AVX2; 1014 } 1015 } 1016 #endif 1017 if (!IS_ALIGNED(width, 2)) { 1018 ScaleRowDown2 = ScaleRowDown2Box_Odd_C; 1019 } 1020 #if defined(HAS_SCALEROWDOWN2_NEON) 1021 if (TestCpuFlag(kCpuHasNEON)) { 1022 ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON; 1023 if (IS_ALIGNED(width, 2)) { 1024 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON; 1025 if (IS_ALIGNED(halfwidth, 16)) { 1026 ScaleRowDown2 = ScaleRowDown2Box_NEON; 1027 } 1028 } 1029 } 1030 #endif 1031 #if defined(HAS_SCALEROWDOWN2_SSSE3) 1032 if (TestCpuFlag(kCpuHasSSSE3)) { 1033 ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3; 1034 if (IS_ALIGNED(width, 2)) { 1035 ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3; 1036 if (IS_ALIGNED(halfwidth, 16)) { 1037 ScaleRowDown2 = ScaleRowDown2Box_SSSE3; 1038 } 1039 } 1040 } 1041 #endif 1042 #if defined(HAS_SCALEROWDOWN2_AVX2) 1043 if (TestCpuFlag(kCpuHasAVX2)) { 1044 ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2; 1045 if (IS_ALIGNED(width, 2)) { 1046 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2; 1047 if (IS_ALIGNED(halfwidth, 32)) { 1048 ScaleRowDown2 = ScaleRowDown2Box_AVX2; 1049 } 1050 } 1051 } 1052 #endif 1053 1054 // Row buffer for intermediate alpha pixels. 1055 align_buffer_64(halfalpha, halfwidth); 1056 for (y = 0; y < height; y += 2) { 1057 // last row of odd height image use 1 row of alpha instead of 2. 1058 if (y == (height - 1)) { 1059 alpha_stride = 0; 1060 } 1061 // Subsample 2 rows of UV to half width and half height. 1062 ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth); 1063 alpha += alpha_stride * 2; 1064 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth); 1065 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth); 1066 src_u0 += src_stride_u0; 1067 src_u1 += src_stride_u1; 1068 dst_u += dst_stride_u; 1069 src_v0 += src_stride_v0; 1070 src_v1 += src_stride_v1; 1071 dst_v += dst_stride_v; 1072 } 1073 free_aligned_buffer_64(halfalpha); 1074 return 0; 1075 } 1076 1077 // Multiply 2 ARGB images and store to destination. 1078 LIBYUV_API 1079 int ARGBMultiply(const uint8* src_argb0, 1080 int src_stride_argb0, 1081 const uint8* src_argb1, 1082 int src_stride_argb1, 1083 uint8* dst_argb, 1084 int dst_stride_argb, 1085 int width, 1086 int height) { 1087 int y; 1088 void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst, 1089 int width) = ARGBMultiplyRow_C; 1090 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 1091 return -1; 1092 } 1093 // Negative height means invert the image. 1094 if (height < 0) { 1095 height = -height; 1096 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1097 dst_stride_argb = -dst_stride_argb; 1098 } 1099 // Coalesce rows. 1100 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && 1101 dst_stride_argb == width * 4) { 1102 width *= height; 1103 height = 1; 1104 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 1105 } 1106 #if defined(HAS_ARGBMULTIPLYROW_SSE2) 1107 if (TestCpuFlag(kCpuHasSSE2)) { 1108 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2; 1109 if (IS_ALIGNED(width, 4)) { 1110 ARGBMultiplyRow = ARGBMultiplyRow_SSE2; 1111 } 1112 } 1113 #endif 1114 #if defined(HAS_ARGBMULTIPLYROW_AVX2) 1115 if (TestCpuFlag(kCpuHasAVX2)) { 1116 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2; 1117 if (IS_ALIGNED(width, 8)) { 1118 ARGBMultiplyRow = ARGBMultiplyRow_AVX2; 1119 } 1120 } 1121 #endif 1122 #if defined(HAS_ARGBMULTIPLYROW_NEON) 1123 if (TestCpuFlag(kCpuHasNEON)) { 1124 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON; 1125 if (IS_ALIGNED(width, 8)) { 1126 ARGBMultiplyRow = ARGBMultiplyRow_NEON; 1127 } 1128 } 1129 #endif 1130 #if defined(HAS_ARGBMULTIPLYROW_MSA) 1131 if (TestCpuFlag(kCpuHasMSA)) { 1132 ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA; 1133 if (IS_ALIGNED(width, 4)) { 1134 ARGBMultiplyRow = ARGBMultiplyRow_MSA; 1135 } 1136 } 1137 #endif 1138 1139 // Multiply plane 1140 for (y = 0; y < height; ++y) { 1141 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width); 1142 src_argb0 += src_stride_argb0; 1143 src_argb1 += src_stride_argb1; 1144 dst_argb += dst_stride_argb; 1145 } 1146 return 0; 1147 } 1148 1149 // Add 2 ARGB images and store to destination. 1150 LIBYUV_API 1151 int ARGBAdd(const uint8* src_argb0, 1152 int src_stride_argb0, 1153 const uint8* src_argb1, 1154 int src_stride_argb1, 1155 uint8* dst_argb, 1156 int dst_stride_argb, 1157 int width, 1158 int height) { 1159 int y; 1160 void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst, 1161 int width) = ARGBAddRow_C; 1162 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 1163 return -1; 1164 } 1165 // Negative height means invert the image. 1166 if (height < 0) { 1167 height = -height; 1168 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1169 dst_stride_argb = -dst_stride_argb; 1170 } 1171 // Coalesce rows. 1172 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && 1173 dst_stride_argb == width * 4) { 1174 width *= height; 1175 height = 1; 1176 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 1177 } 1178 #if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__)) 1179 if (TestCpuFlag(kCpuHasSSE2)) { 1180 ARGBAddRow = ARGBAddRow_SSE2; 1181 } 1182 #endif 1183 #if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__)) 1184 if (TestCpuFlag(kCpuHasSSE2)) { 1185 ARGBAddRow = ARGBAddRow_Any_SSE2; 1186 if (IS_ALIGNED(width, 4)) { 1187 ARGBAddRow = ARGBAddRow_SSE2; 1188 } 1189 } 1190 #endif 1191 #if defined(HAS_ARGBADDROW_AVX2) 1192 if (TestCpuFlag(kCpuHasAVX2)) { 1193 ARGBAddRow = ARGBAddRow_Any_AVX2; 1194 if (IS_ALIGNED(width, 8)) { 1195 ARGBAddRow = ARGBAddRow_AVX2; 1196 } 1197 } 1198 #endif 1199 #if defined(HAS_ARGBADDROW_NEON) 1200 if (TestCpuFlag(kCpuHasNEON)) { 1201 ARGBAddRow = ARGBAddRow_Any_NEON; 1202 if (IS_ALIGNED(width, 8)) { 1203 ARGBAddRow = ARGBAddRow_NEON; 1204 } 1205 } 1206 #endif 1207 #if defined(HAS_ARGBADDROW_MSA) 1208 if (TestCpuFlag(kCpuHasMSA)) { 1209 ARGBAddRow = ARGBAddRow_Any_MSA; 1210 if (IS_ALIGNED(width, 8)) { 1211 ARGBAddRow = ARGBAddRow_MSA; 1212 } 1213 } 1214 #endif 1215 1216 // Add plane 1217 for (y = 0; y < height; ++y) { 1218 ARGBAddRow(src_argb0, src_argb1, dst_argb, width); 1219 src_argb0 += src_stride_argb0; 1220 src_argb1 += src_stride_argb1; 1221 dst_argb += dst_stride_argb; 1222 } 1223 return 0; 1224 } 1225 1226 // Subtract 2 ARGB images and store to destination. 1227 LIBYUV_API 1228 int ARGBSubtract(const uint8* src_argb0, 1229 int src_stride_argb0, 1230 const uint8* src_argb1, 1231 int src_stride_argb1, 1232 uint8* dst_argb, 1233 int dst_stride_argb, 1234 int width, 1235 int height) { 1236 int y; 1237 void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst, 1238 int width) = ARGBSubtractRow_C; 1239 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 1240 return -1; 1241 } 1242 // Negative height means invert the image. 1243 if (height < 0) { 1244 height = -height; 1245 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1246 dst_stride_argb = -dst_stride_argb; 1247 } 1248 // Coalesce rows. 1249 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && 1250 dst_stride_argb == width * 4) { 1251 width *= height; 1252 height = 1; 1253 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 1254 } 1255 #if defined(HAS_ARGBSUBTRACTROW_SSE2) 1256 if (TestCpuFlag(kCpuHasSSE2)) { 1257 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2; 1258 if (IS_ALIGNED(width, 4)) { 1259 ARGBSubtractRow = ARGBSubtractRow_SSE2; 1260 } 1261 } 1262 #endif 1263 #if defined(HAS_ARGBSUBTRACTROW_AVX2) 1264 if (TestCpuFlag(kCpuHasAVX2)) { 1265 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2; 1266 if (IS_ALIGNED(width, 8)) { 1267 ARGBSubtractRow = ARGBSubtractRow_AVX2; 1268 } 1269 } 1270 #endif 1271 #if defined(HAS_ARGBSUBTRACTROW_NEON) 1272 if (TestCpuFlag(kCpuHasNEON)) { 1273 ARGBSubtractRow = ARGBSubtractRow_Any_NEON; 1274 if (IS_ALIGNED(width, 8)) { 1275 ARGBSubtractRow = ARGBSubtractRow_NEON; 1276 } 1277 } 1278 #endif 1279 #if defined(HAS_ARGBSUBTRACTROW_MSA) 1280 if (TestCpuFlag(kCpuHasMSA)) { 1281 ARGBSubtractRow = ARGBSubtractRow_Any_MSA; 1282 if (IS_ALIGNED(width, 8)) { 1283 ARGBSubtractRow = ARGBSubtractRow_MSA; 1284 } 1285 } 1286 #endif 1287 1288 // Subtract plane 1289 for (y = 0; y < height; ++y) { 1290 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width); 1291 src_argb0 += src_stride_argb0; 1292 src_argb1 += src_stride_argb1; 1293 dst_argb += dst_stride_argb; 1294 } 1295 return 0; 1296 } 1297 // Convert I422 to RGBA with matrix 1298 static int I422ToRGBAMatrix(const uint8* src_y, 1299 int src_stride_y, 1300 const uint8* src_u, 1301 int src_stride_u, 1302 const uint8* src_v, 1303 int src_stride_v, 1304 uint8* dst_rgba, 1305 int dst_stride_rgba, 1306 const struct YuvConstants* yuvconstants, 1307 int width, 1308 int height) { 1309 int y; 1310 void (*I422ToRGBARow)(const uint8* y_buf, const uint8* u_buf, 1311 const uint8* v_buf, uint8* rgb_buf, 1312 const struct YuvConstants* yuvconstants, int width) = 1313 I422ToRGBARow_C; 1314 if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { 1315 return -1; 1316 } 1317 // Negative height means invert the image. 1318 if (height < 0) { 1319 height = -height; 1320 dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; 1321 dst_stride_rgba = -dst_stride_rgba; 1322 } 1323 #if defined(HAS_I422TORGBAROW_SSSE3) 1324 if (TestCpuFlag(kCpuHasSSSE3)) { 1325 I422ToRGBARow = I422ToRGBARow_Any_SSSE3; 1326 if (IS_ALIGNED(width, 8)) { 1327 I422ToRGBARow = I422ToRGBARow_SSSE3; 1328 } 1329 } 1330 #endif 1331 #if defined(HAS_I422TORGBAROW_AVX2) 1332 if (TestCpuFlag(kCpuHasAVX2)) { 1333 I422ToRGBARow = I422ToRGBARow_Any_AVX2; 1334 if (IS_ALIGNED(width, 16)) { 1335 I422ToRGBARow = I422ToRGBARow_AVX2; 1336 } 1337 } 1338 #endif 1339 #if defined(HAS_I422TORGBAROW_NEON) 1340 if (TestCpuFlag(kCpuHasNEON)) { 1341 I422ToRGBARow = I422ToRGBARow_Any_NEON; 1342 if (IS_ALIGNED(width, 8)) { 1343 I422ToRGBARow = I422ToRGBARow_NEON; 1344 } 1345 } 1346 #endif 1347 #if defined(HAS_I422TORGBAROW_DSPR2) 1348 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && 1349 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && 1350 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && 1351 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && 1352 IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) { 1353 I422ToRGBARow = I422ToRGBARow_DSPR2; 1354 } 1355 #endif 1356 #if defined(HAS_I422TORGBAROW_MSA) 1357 if (TestCpuFlag(kCpuHasMSA)) { 1358 I422ToRGBARow = I422ToRGBARow_Any_MSA; 1359 if (IS_ALIGNED(width, 8)) { 1360 I422ToRGBARow = I422ToRGBARow_MSA; 1361 } 1362 } 1363 #endif 1364 1365 for (y = 0; y < height; ++y) { 1366 I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); 1367 dst_rgba += dst_stride_rgba; 1368 src_y += src_stride_y; 1369 src_u += src_stride_u; 1370 src_v += src_stride_v; 1371 } 1372 return 0; 1373 } 1374 1375 // Convert I422 to RGBA. 1376 LIBYUV_API 1377 int I422ToRGBA(const uint8* src_y, 1378 int src_stride_y, 1379 const uint8* src_u, 1380 int src_stride_u, 1381 const uint8* src_v, 1382 int src_stride_v, 1383 uint8* dst_rgba, 1384 int dst_stride_rgba, 1385 int width, 1386 int height) { 1387 return I422ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, 1388 src_stride_v, dst_rgba, dst_stride_rgba, 1389 &kYuvI601Constants, width, height); 1390 } 1391 1392 // Convert I422 to BGRA. 1393 LIBYUV_API 1394 int I422ToBGRA(const uint8* src_y, 1395 int src_stride_y, 1396 const uint8* src_u, 1397 int src_stride_u, 1398 const uint8* src_v, 1399 int src_stride_v, 1400 uint8* dst_bgra, 1401 int dst_stride_bgra, 1402 int width, 1403 int height) { 1404 return I422ToRGBAMatrix(src_y, src_stride_y, src_v, 1405 src_stride_v, // Swap U and V 1406 src_u, src_stride_u, dst_bgra, dst_stride_bgra, 1407 &kYvuI601Constants, // Use Yvu matrix 1408 width, height); 1409 } 1410 1411 // Convert NV12 to RGB565. 1412 LIBYUV_API 1413 int NV12ToRGB565(const uint8* src_y, 1414 int src_stride_y, 1415 const uint8* src_uv, 1416 int src_stride_uv, 1417 uint8* dst_rgb565, 1418 int dst_stride_rgb565, 1419 int width, 1420 int height) { 1421 int y; 1422 void (*NV12ToRGB565Row)( 1423 const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, 1424 const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C; 1425 if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { 1426 return -1; 1427 } 1428 // Negative height means invert the image. 1429 if (height < 0) { 1430 height = -height; 1431 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; 1432 dst_stride_rgb565 = -dst_stride_rgb565; 1433 } 1434 #if defined(HAS_NV12TORGB565ROW_SSSE3) 1435 if (TestCpuFlag(kCpuHasSSSE3)) { 1436 NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3; 1437 if (IS_ALIGNED(width, 8)) { 1438 NV12ToRGB565Row = NV12ToRGB565Row_SSSE3; 1439 } 1440 } 1441 #endif 1442 #if defined(HAS_NV12TORGB565ROW_AVX2) 1443 if (TestCpuFlag(kCpuHasAVX2)) { 1444 NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2; 1445 if (IS_ALIGNED(width, 16)) { 1446 NV12ToRGB565Row = NV12ToRGB565Row_AVX2; 1447 } 1448 } 1449 #endif 1450 #if defined(HAS_NV12TORGB565ROW_NEON) 1451 if (TestCpuFlag(kCpuHasNEON)) { 1452 NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON; 1453 if (IS_ALIGNED(width, 8)) { 1454 NV12ToRGB565Row = NV12ToRGB565Row_NEON; 1455 } 1456 } 1457 #endif 1458 #if defined(HAS_NV12TORGB565ROW_MSA) 1459 if (TestCpuFlag(kCpuHasMSA)) { 1460 NV12ToRGB565Row = NV12ToRGB565Row_Any_MSA; 1461 if (IS_ALIGNED(width, 8)) { 1462 NV12ToRGB565Row = NV12ToRGB565Row_MSA; 1463 } 1464 } 1465 #endif 1466 1467 for (y = 0; y < height; ++y) { 1468 NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width); 1469 dst_rgb565 += dst_stride_rgb565; 1470 src_y += src_stride_y; 1471 if (y & 1) { 1472 src_uv += src_stride_uv; 1473 } 1474 } 1475 return 0; 1476 } 1477 1478 // Convert RAW to RGB24. 1479 LIBYUV_API 1480 int RAWToRGB24(const uint8* src_raw, 1481 int src_stride_raw, 1482 uint8* dst_rgb24, 1483 int dst_stride_rgb24, 1484 int width, 1485 int height) { 1486 int y; 1487 void (*RAWToRGB24Row)(const uint8* src_rgb, uint8* dst_rgb24, int width) = 1488 RAWToRGB24Row_C; 1489 if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) { 1490 return -1; 1491 } 1492 // Negative height means invert the image. 1493 if (height < 0) { 1494 height = -height; 1495 src_raw = src_raw + (height - 1) * src_stride_raw; 1496 src_stride_raw = -src_stride_raw; 1497 } 1498 // Coalesce rows. 1499 if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) { 1500 width *= height; 1501 height = 1; 1502 src_stride_raw = dst_stride_rgb24 = 0; 1503 } 1504 #if defined(HAS_RAWTORGB24ROW_SSSE3) 1505 if (TestCpuFlag(kCpuHasSSSE3)) { 1506 RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3; 1507 if (IS_ALIGNED(width, 8)) { 1508 RAWToRGB24Row = RAWToRGB24Row_SSSE3; 1509 } 1510 } 1511 #endif 1512 #if defined(HAS_RAWTORGB24ROW_NEON) 1513 if (TestCpuFlag(kCpuHasNEON)) { 1514 RAWToRGB24Row = RAWToRGB24Row_Any_NEON; 1515 if (IS_ALIGNED(width, 8)) { 1516 RAWToRGB24Row = RAWToRGB24Row_NEON; 1517 } 1518 } 1519 #endif 1520 #if defined(HAS_RAWTORGB24ROW_MSA) 1521 if (TestCpuFlag(kCpuHasMSA)) { 1522 RAWToRGB24Row = RAWToRGB24Row_Any_MSA; 1523 if (IS_ALIGNED(width, 16)) { 1524 RAWToRGB24Row = RAWToRGB24Row_MSA; 1525 } 1526 } 1527 #endif 1528 1529 for (y = 0; y < height; ++y) { 1530 RAWToRGB24Row(src_raw, dst_rgb24, width); 1531 src_raw += src_stride_raw; 1532 dst_rgb24 += dst_stride_rgb24; 1533 } 1534 return 0; 1535 } 1536 1537 LIBYUV_API 1538 void SetPlane(uint8* dst_y, 1539 int dst_stride_y, 1540 int width, 1541 int height, 1542 uint32 value) { 1543 int y; 1544 void (*SetRow)(uint8 * dst, uint8 value, int width) = SetRow_C; 1545 if (height < 0) { 1546 height = -height; 1547 dst_y = dst_y + (height - 1) * dst_stride_y; 1548 dst_stride_y = -dst_stride_y; 1549 } 1550 // Coalesce rows. 1551 if (dst_stride_y == width) { 1552 width *= height; 1553 height = 1; 1554 dst_stride_y = 0; 1555 } 1556 #if defined(HAS_SETROW_NEON) 1557 if (TestCpuFlag(kCpuHasNEON)) { 1558 SetRow = SetRow_Any_NEON; 1559 if (IS_ALIGNED(width, 16)) { 1560 SetRow = SetRow_NEON; 1561 } 1562 } 1563 #endif 1564 #if defined(HAS_SETROW_X86) 1565 if (TestCpuFlag(kCpuHasX86)) { 1566 SetRow = SetRow_Any_X86; 1567 if (IS_ALIGNED(width, 4)) { 1568 SetRow = SetRow_X86; 1569 } 1570 } 1571 #endif 1572 #if defined(HAS_SETROW_ERMS) 1573 if (TestCpuFlag(kCpuHasERMS)) { 1574 SetRow = SetRow_ERMS; 1575 } 1576 #endif 1577 1578 // Set plane 1579 for (y = 0; y < height; ++y) { 1580 SetRow(dst_y, value, width); 1581 dst_y += dst_stride_y; 1582 } 1583 } 1584 1585 // Draw a rectangle into I420 1586 LIBYUV_API 1587 int I420Rect(uint8* dst_y, 1588 int dst_stride_y, 1589 uint8* dst_u, 1590 int dst_stride_u, 1591 uint8* dst_v, 1592 int dst_stride_v, 1593 int x, 1594 int y, 1595 int width, 1596 int height, 1597 int value_y, 1598 int value_u, 1599 int value_v) { 1600 int halfwidth = (width + 1) >> 1; 1601 int halfheight = (height + 1) >> 1; 1602 uint8* start_y = dst_y + y * dst_stride_y + x; 1603 uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); 1604 uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); 1605 if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 || 1606 y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 || 1607 value_v < 0 || value_v > 255) { 1608 return -1; 1609 } 1610 1611 SetPlane(start_y, dst_stride_y, width, height, value_y); 1612 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u); 1613 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v); 1614 return 0; 1615 } 1616 1617 // Draw a rectangle into ARGB 1618 LIBYUV_API 1619 int ARGBRect(uint8* dst_argb, 1620 int dst_stride_argb, 1621 int dst_x, 1622 int dst_y, 1623 int width, 1624 int height, 1625 uint32 value) { 1626 int y; 1627 void (*ARGBSetRow)(uint8 * dst_argb, uint32 value, int width) = ARGBSetRow_C; 1628 if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) { 1629 return -1; 1630 } 1631 if (height < 0) { 1632 height = -height; 1633 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1634 dst_stride_argb = -dst_stride_argb; 1635 } 1636 dst_argb += dst_y * dst_stride_argb + dst_x * 4; 1637 // Coalesce rows. 1638 if (dst_stride_argb == width * 4) { 1639 width *= height; 1640 height = 1; 1641 dst_stride_argb = 0; 1642 } 1643 1644 #if defined(HAS_ARGBSETROW_NEON) 1645 if (TestCpuFlag(kCpuHasNEON)) { 1646 ARGBSetRow = ARGBSetRow_Any_NEON; 1647 if (IS_ALIGNED(width, 4)) { 1648 ARGBSetRow = ARGBSetRow_NEON; 1649 } 1650 } 1651 #endif 1652 #if defined(HAS_ARGBSETROW_X86) 1653 if (TestCpuFlag(kCpuHasX86)) { 1654 ARGBSetRow = ARGBSetRow_X86; 1655 } 1656 #endif 1657 #if defined(HAS_ARGBSETROW_MSA) 1658 if (TestCpuFlag(kCpuHasMSA)) { 1659 ARGBSetRow = ARGBSetRow_Any_MSA; 1660 if (IS_ALIGNED(width, 4)) { 1661 ARGBSetRow = ARGBSetRow_MSA; 1662 } 1663 } 1664 #endif 1665 1666 // Set plane 1667 for (y = 0; y < height; ++y) { 1668 ARGBSetRow(dst_argb, value, width); 1669 dst_argb += dst_stride_argb; 1670 } 1671 return 0; 1672 } 1673 1674 // Convert unattentuated ARGB to preattenuated ARGB. 1675 // An unattenutated ARGB alpha blend uses the formula 1676 // p = a * f + (1 - a) * b 1677 // where 1678 // p is output pixel 1679 // f is foreground pixel 1680 // b is background pixel 1681 // a is alpha value from foreground pixel 1682 // An preattenutated ARGB alpha blend uses the formula 1683 // p = f + (1 - a) * b 1684 // where 1685 // f is foreground pixel premultiplied by alpha 1686 1687 LIBYUV_API 1688 int ARGBAttenuate(const uint8* src_argb, 1689 int src_stride_argb, 1690 uint8* dst_argb, 1691 int dst_stride_argb, 1692 int width, 1693 int height) { 1694 int y; 1695 void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, int width) = 1696 ARGBAttenuateRow_C; 1697 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1698 return -1; 1699 } 1700 if (height < 0) { 1701 height = -height; 1702 src_argb = src_argb + (height - 1) * src_stride_argb; 1703 src_stride_argb = -src_stride_argb; 1704 } 1705 // Coalesce rows. 1706 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { 1707 width *= height; 1708 height = 1; 1709 src_stride_argb = dst_stride_argb = 0; 1710 } 1711 #if defined(HAS_ARGBATTENUATEROW_SSSE3) 1712 if (TestCpuFlag(kCpuHasSSSE3)) { 1713 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; 1714 if (IS_ALIGNED(width, 4)) { 1715 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; 1716 } 1717 } 1718 #endif 1719 #if defined(HAS_ARGBATTENUATEROW_AVX2) 1720 if (TestCpuFlag(kCpuHasAVX2)) { 1721 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; 1722 if (IS_ALIGNED(width, 8)) { 1723 ARGBAttenuateRow = ARGBAttenuateRow_AVX2; 1724 } 1725 } 1726 #endif 1727 #if defined(HAS_ARGBATTENUATEROW_NEON) 1728 if (TestCpuFlag(kCpuHasNEON)) { 1729 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; 1730 if (IS_ALIGNED(width, 8)) { 1731 ARGBAttenuateRow = ARGBAttenuateRow_NEON; 1732 } 1733 } 1734 #endif 1735 #if defined(HAS_ARGBATTENUATEROW_MSA) 1736 if (TestCpuFlag(kCpuHasMSA)) { 1737 ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; 1738 if (IS_ALIGNED(width, 8)) { 1739 ARGBAttenuateRow = ARGBAttenuateRow_MSA; 1740 } 1741 } 1742 #endif 1743 1744 for (y = 0; y < height; ++y) { 1745 ARGBAttenuateRow(src_argb, dst_argb, width); 1746 src_argb += src_stride_argb; 1747 dst_argb += dst_stride_argb; 1748 } 1749 return 0; 1750 } 1751 1752 // Convert preattentuated ARGB to unattenuated ARGB. 1753 LIBYUV_API 1754 int ARGBUnattenuate(const uint8* src_argb, 1755 int src_stride_argb, 1756 uint8* dst_argb, 1757 int dst_stride_argb, 1758 int width, 1759 int height) { 1760 int y; 1761 void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, 1762 int width) = ARGBUnattenuateRow_C; 1763 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1764 return -1; 1765 } 1766 if (height < 0) { 1767 height = -height; 1768 src_argb = src_argb + (height - 1) * src_stride_argb; 1769 src_stride_argb = -src_stride_argb; 1770 } 1771 // Coalesce rows. 1772 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { 1773 width *= height; 1774 height = 1; 1775 src_stride_argb = dst_stride_argb = 0; 1776 } 1777 #if defined(HAS_ARGBUNATTENUATEROW_SSE2) 1778 if (TestCpuFlag(kCpuHasSSE2)) { 1779 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2; 1780 if (IS_ALIGNED(width, 4)) { 1781 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; 1782 } 1783 } 1784 #endif 1785 #if defined(HAS_ARGBUNATTENUATEROW_AVX2) 1786 if (TestCpuFlag(kCpuHasAVX2)) { 1787 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2; 1788 if (IS_ALIGNED(width, 8)) { 1789 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2; 1790 } 1791 } 1792 #endif 1793 // TODO(fbarchard): Neon version. 1794 1795 for (y = 0; y < height; ++y) { 1796 ARGBUnattenuateRow(src_argb, dst_argb, width); 1797 src_argb += src_stride_argb; 1798 dst_argb += dst_stride_argb; 1799 } 1800 return 0; 1801 } 1802 1803 // Convert ARGB to Grayed ARGB. 1804 LIBYUV_API 1805 int ARGBGrayTo(const uint8* src_argb, 1806 int src_stride_argb, 1807 uint8* dst_argb, 1808 int dst_stride_argb, 1809 int width, 1810 int height) { 1811 int y; 1812 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, int width) = 1813 ARGBGrayRow_C; 1814 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1815 return -1; 1816 } 1817 if (height < 0) { 1818 height = -height; 1819 src_argb = src_argb + (height - 1) * src_stride_argb; 1820 src_stride_argb = -src_stride_argb; 1821 } 1822 // Coalesce rows. 1823 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { 1824 width *= height; 1825 height = 1; 1826 src_stride_argb = dst_stride_argb = 0; 1827 } 1828 #if defined(HAS_ARGBGRAYROW_SSSE3) 1829 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { 1830 ARGBGrayRow = ARGBGrayRow_SSSE3; 1831 } 1832 #endif 1833 #if defined(HAS_ARGBGRAYROW_NEON) 1834 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1835 ARGBGrayRow = ARGBGrayRow_NEON; 1836 } 1837 #endif 1838 #if defined(HAS_ARGBGRAYROW_MSA) 1839 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { 1840 ARGBGrayRow = ARGBGrayRow_MSA; 1841 } 1842 #endif 1843 1844 for (y = 0; y < height; ++y) { 1845 ARGBGrayRow(src_argb, dst_argb, width); 1846 src_argb += src_stride_argb; 1847 dst_argb += dst_stride_argb; 1848 } 1849 return 0; 1850 } 1851 1852 // Make a rectangle of ARGB gray scale. 1853 LIBYUV_API 1854 int ARGBGray(uint8* dst_argb, 1855 int dst_stride_argb, 1856 int dst_x, 1857 int dst_y, 1858 int width, 1859 int height) { 1860 int y; 1861 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, int width) = 1862 ARGBGrayRow_C; 1863 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1864 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { 1865 return -1; 1866 } 1867 // Coalesce rows. 1868 if (dst_stride_argb == width * 4) { 1869 width *= height; 1870 height = 1; 1871 dst_stride_argb = 0; 1872 } 1873 #if defined(HAS_ARGBGRAYROW_SSSE3) 1874 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { 1875 ARGBGrayRow = ARGBGrayRow_SSSE3; 1876 } 1877 #endif 1878 #if defined(HAS_ARGBGRAYROW_NEON) 1879 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1880 ARGBGrayRow = ARGBGrayRow_NEON; 1881 } 1882 #endif 1883 #if defined(HAS_ARGBGRAYROW_MSA) 1884 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { 1885 ARGBGrayRow = ARGBGrayRow_MSA; 1886 } 1887 #endif 1888 1889 for (y = 0; y < height; ++y) { 1890 ARGBGrayRow(dst, dst, width); 1891 dst += dst_stride_argb; 1892 } 1893 return 0; 1894 } 1895 1896 // Make a rectangle of ARGB Sepia tone. 1897 LIBYUV_API 1898 int ARGBSepia(uint8* dst_argb, 1899 int dst_stride_argb, 1900 int dst_x, 1901 int dst_y, 1902 int width, 1903 int height) { 1904 int y; 1905 void (*ARGBSepiaRow)(uint8 * dst_argb, int width) = ARGBSepiaRow_C; 1906 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1907 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { 1908 return -1; 1909 } 1910 // Coalesce rows. 1911 if (dst_stride_argb == width * 4) { 1912 width *= height; 1913 height = 1; 1914 dst_stride_argb = 0; 1915 } 1916 #if defined(HAS_ARGBSEPIAROW_SSSE3) 1917 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { 1918 ARGBSepiaRow = ARGBSepiaRow_SSSE3; 1919 } 1920 #endif 1921 #if defined(HAS_ARGBSEPIAROW_NEON) 1922 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1923 ARGBSepiaRow = ARGBSepiaRow_NEON; 1924 } 1925 #endif 1926 #if defined(HAS_ARGBSEPIAROW_MSA) 1927 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { 1928 ARGBSepiaRow = ARGBSepiaRow_MSA; 1929 } 1930 #endif 1931 1932 for (y = 0; y < height; ++y) { 1933 ARGBSepiaRow(dst, width); 1934 dst += dst_stride_argb; 1935 } 1936 return 0; 1937 } 1938 1939 // Apply a 4x4 matrix to each ARGB pixel. 1940 // Note: Normally for shading, but can be used to swizzle or invert. 1941 LIBYUV_API 1942 int ARGBColorMatrix(const uint8* src_argb, 1943 int src_stride_argb, 1944 uint8* dst_argb, 1945 int dst_stride_argb, 1946 const int8* matrix_argb, 1947 int width, 1948 int height) { 1949 int y; 1950 void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb, 1951 const int8* matrix_argb, int width) = 1952 ARGBColorMatrixRow_C; 1953 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) { 1954 return -1; 1955 } 1956 if (height < 0) { 1957 height = -height; 1958 src_argb = src_argb + (height - 1) * src_stride_argb; 1959 src_stride_argb = -src_stride_argb; 1960 } 1961 // Coalesce rows. 1962 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { 1963 width *= height; 1964 height = 1; 1965 src_stride_argb = dst_stride_argb = 0; 1966 } 1967 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3) 1968 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { 1969 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3; 1970 } 1971 #endif 1972 #if defined(HAS_ARGBCOLORMATRIXROW_NEON) 1973 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1974 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON; 1975 } 1976 #endif 1977 for (y = 0; y < height; ++y) { 1978 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); 1979 src_argb += src_stride_argb; 1980 dst_argb += dst_stride_argb; 1981 } 1982 return 0; 1983 } 1984 1985 // Apply a 4x3 matrix to each ARGB pixel. 1986 // Deprecated. 1987 LIBYUV_API 1988 int RGBColorMatrix(uint8* dst_argb, 1989 int dst_stride_argb, 1990 const int8* matrix_rgb, 1991 int dst_x, 1992 int dst_y, 1993 int width, 1994 int height) { 1995 SIMD_ALIGNED(int8 matrix_argb[16]); 1996 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1997 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 || 1998 dst_y < 0) { 1999 return -1; 2000 } 2001 2002 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix. 2003 matrix_argb[0] = matrix_rgb[0] / 2; 2004 matrix_argb[1] = matrix_rgb[1] / 2; 2005 matrix_argb[2] = matrix_rgb[2] / 2; 2006 matrix_argb[3] = matrix_rgb[3] / 2; 2007 matrix_argb[4] = matrix_rgb[4] / 2; 2008 matrix_argb[5] = matrix_rgb[5] / 2; 2009 matrix_argb[6] = matrix_rgb[6] / 2; 2010 matrix_argb[7] = matrix_rgb[7] / 2; 2011 matrix_argb[8] = matrix_rgb[8] / 2; 2012 matrix_argb[9] = matrix_rgb[9] / 2; 2013 matrix_argb[10] = matrix_rgb[10] / 2; 2014 matrix_argb[11] = matrix_rgb[11] / 2; 2015 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0; 2016 matrix_argb[15] = 64; // 1.0 2017 2018 return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, dst, 2019 dst_stride_argb, &matrix_argb[0], width, height); 2020 } 2021 2022 // Apply a color table each ARGB pixel. 2023 // Table contains 256 ARGB values. 2024 LIBYUV_API 2025 int ARGBColorTable(uint8* dst_argb, 2026 int dst_stride_argb, 2027 const uint8* table_argb, 2028 int dst_x, 2029 int dst_y, 2030 int width, 2031 int height) { 2032 int y; 2033 void (*ARGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb, 2034 int width) = ARGBColorTableRow_C; 2035 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 2036 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || 2037 dst_y < 0) { 2038 return -1; 2039 } 2040 // Coalesce rows. 2041 if (dst_stride_argb == width * 4) { 2042 width *= height; 2043 height = 1; 2044 dst_stride_argb = 0; 2045 } 2046 #if defined(HAS_ARGBCOLORTABLEROW_X86) 2047 if (TestCpuFlag(kCpuHasX86)) { 2048 ARGBColorTableRow = ARGBColorTableRow_X86; 2049 } 2050 #endif 2051 for (y = 0; y < height; ++y) { 2052 ARGBColorTableRow(dst, table_argb, width); 2053 dst += dst_stride_argb; 2054 } 2055 return 0; 2056 } 2057 2058 // Apply a color table each ARGB pixel but preserve destination alpha. 2059 // Table contains 256 ARGB values. 2060 LIBYUV_API 2061 int RGBColorTable(uint8* dst_argb, 2062 int dst_stride_argb, 2063 const uint8* table_argb, 2064 int dst_x, 2065 int dst_y, 2066 int width, 2067 int height) { 2068 int y; 2069 void (*RGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb, 2070 int width) = RGBColorTableRow_C; 2071 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 2072 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || 2073 dst_y < 0) { 2074 return -1; 2075 } 2076 // Coalesce rows. 2077 if (dst_stride_argb == width * 4) { 2078 width *= height; 2079 height = 1; 2080 dst_stride_argb = 0; 2081 } 2082 #if defined(HAS_RGBCOLORTABLEROW_X86) 2083 if (TestCpuFlag(kCpuHasX86)) { 2084 RGBColorTableRow = RGBColorTableRow_X86; 2085 } 2086 #endif 2087 for (y = 0; y < height; ++y) { 2088 RGBColorTableRow(dst, table_argb, width); 2089 dst += dst_stride_argb; 2090 } 2091 return 0; 2092 } 2093 2094 // ARGBQuantize is used to posterize art. 2095 // e.g. rgb / qvalue * qvalue + qvalue / 2 2096 // But the low levels implement efficiently with 3 parameters, and could be 2097 // used for other high level operations. 2098 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; 2099 // where scale is 1 / interval_size as a fixed point value. 2100 // The divide is replaces with a multiply by reciprocal fixed point multiply. 2101 // Caveat - although SSE2 saturates, the C function does not and should be used 2102 // with care if doing anything but quantization. 2103 LIBYUV_API 2104 int ARGBQuantize(uint8* dst_argb, 2105 int dst_stride_argb, 2106 int scale, 2107 int interval_size, 2108 int interval_offset, 2109 int dst_x, 2110 int dst_y, 2111 int width, 2112 int height) { 2113 int y; 2114 void (*ARGBQuantizeRow)(uint8 * dst_argb, int scale, int interval_size, 2115 int interval_offset, int width) = ARGBQuantizeRow_C; 2116 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 2117 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || 2118 interval_size < 1 || interval_size > 255) { 2119 return -1; 2120 } 2121 // Coalesce rows. 2122 if (dst_stride_argb == width * 4) { 2123 width *= height; 2124 height = 1; 2125 dst_stride_argb = 0; 2126 } 2127 #if defined(HAS_ARGBQUANTIZEROW_SSE2) 2128 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) { 2129 ARGBQuantizeRow = ARGBQuantizeRow_SSE2; 2130 } 2131 #endif 2132 #if defined(HAS_ARGBQUANTIZEROW_NEON) 2133 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 2134 ARGBQuantizeRow = ARGBQuantizeRow_NEON; 2135 } 2136 #endif 2137 for (y = 0; y < height; ++y) { 2138 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width); 2139 dst += dst_stride_argb; 2140 } 2141 return 0; 2142 } 2143 2144 // Computes table of cumulative sum for image where the value is the sum 2145 // of all values above and to the left of the entry. Used by ARGBBlur. 2146 LIBYUV_API 2147 int ARGBComputeCumulativeSum(const uint8* src_argb, 2148 int src_stride_argb, 2149 int32* dst_cumsum, 2150 int dst_stride32_cumsum, 2151 int width, 2152 int height) { 2153 int y; 2154 void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, 2155 const int32* previous_cumsum, int width) = 2156 ComputeCumulativeSumRow_C; 2157 int32* previous_cumsum = dst_cumsum; 2158 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) { 2159 return -1; 2160 } 2161 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) 2162 if (TestCpuFlag(kCpuHasSSE2)) { 2163 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; 2164 } 2165 #endif 2166 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel. 2167 for (y = 0; y < height; ++y) { 2168 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width); 2169 previous_cumsum = dst_cumsum; 2170 dst_cumsum += dst_stride32_cumsum; 2171 src_argb += src_stride_argb; 2172 } 2173 return 0; 2174 } 2175 2176 // Blur ARGB image. 2177 // Caller should allocate CumulativeSum table of width * height * 16 bytes 2178 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory 2179 // as the buffer is treated as circular. 2180 LIBYUV_API 2181 int ARGBBlur(const uint8* src_argb, 2182 int src_stride_argb, 2183 uint8* dst_argb, 2184 int dst_stride_argb, 2185 int32* dst_cumsum, 2186 int dst_stride32_cumsum, 2187 int width, 2188 int height, 2189 int radius) { 2190 int y; 2191 void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, 2192 const int32* previous_cumsum, int width) = 2193 ComputeCumulativeSumRow_C; 2194 void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft, 2195 int width, int area, uint8* dst, 2196 int count) = CumulativeSumToAverageRow_C; 2197 int32* cumsum_bot_row; 2198 int32* max_cumsum_bot_row; 2199 int32* cumsum_top_row; 2200 2201 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 2202 return -1; 2203 } 2204 if (height < 0) { 2205 height = -height; 2206 src_argb = src_argb + (height - 1) * src_stride_argb; 2207 src_stride_argb = -src_stride_argb; 2208 } 2209 if (radius > height) { 2210 radius = height; 2211 } 2212 if (radius > (width / 2 - 1)) { 2213 radius = width / 2 - 1; 2214 } 2215 if (radius <= 0) { 2216 return -1; 2217 } 2218 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) 2219 if (TestCpuFlag(kCpuHasSSE2)) { 2220 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; 2221 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2; 2222 } 2223 #endif 2224 // Compute enough CumulativeSum for first row to be blurred. After this 2225 // one row of CumulativeSum is updated at a time. 2226 ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum, 2227 dst_stride32_cumsum, width, radius); 2228 2229 src_argb = src_argb + radius * src_stride_argb; 2230 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum]; 2231 2232 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum]; 2233 cumsum_top_row = &dst_cumsum[0]; 2234 2235 for (y = 0; y < height; ++y) { 2236 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0; 2237 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1); 2238 int area = radius * (bot_y - top_y); 2239 int boxwidth = radius * 4; 2240 int x; 2241 int n; 2242 2243 // Increment cumsum_top_row pointer with circular buffer wrap around. 2244 if (top_y) { 2245 cumsum_top_row += dst_stride32_cumsum; 2246 if (cumsum_top_row >= max_cumsum_bot_row) { 2247 cumsum_top_row = dst_cumsum; 2248 } 2249 } 2250 // Increment cumsum_bot_row pointer with circular buffer wrap around and 2251 // then fill in a row of CumulativeSum. 2252 if ((y + radius) < height) { 2253 const int32* prev_cumsum_bot_row = cumsum_bot_row; 2254 cumsum_bot_row += dst_stride32_cumsum; 2255 if (cumsum_bot_row >= max_cumsum_bot_row) { 2256 cumsum_bot_row = dst_cumsum; 2257 } 2258 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row, 2259 width); 2260 src_argb += src_stride_argb; 2261 } 2262 2263 // Left clipped. 2264 for (x = 0; x < radius + 1; ++x) { 2265 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area, 2266 &dst_argb[x * 4], 1); 2267 area += (bot_y - top_y); 2268 boxwidth += 4; 2269 } 2270 2271 // Middle unclipped. 2272 n = (width - 1) - radius - x + 1; 2273 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area, 2274 &dst_argb[x * 4], n); 2275 2276 // Right clipped. 2277 for (x += n; x <= width - 1; ++x) { 2278 area -= (bot_y - top_y); 2279 boxwidth -= 4; 2280 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4, 2281 cumsum_bot_row + (x - radius - 1) * 4, boxwidth, 2282 area, &dst_argb[x * 4], 1); 2283 } 2284 dst_argb += dst_stride_argb; 2285 } 2286 return 0; 2287 } 2288 2289 // Multiply ARGB image by a specified ARGB value. 2290 LIBYUV_API 2291 int ARGBShade(const uint8* src_argb, 2292 int src_stride_argb, 2293 uint8* dst_argb, 2294 int dst_stride_argb, 2295 int width, 2296 int height, 2297 uint32 value) { 2298 int y; 2299 void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, int width, 2300 uint32 value) = ARGBShadeRow_C; 2301 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { 2302 return -1; 2303 } 2304 if (height < 0) { 2305 height = -height; 2306 src_argb = src_argb + (height - 1) * src_stride_argb; 2307 src_stride_argb = -src_stride_argb; 2308 } 2309 // Coalesce rows. 2310 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { 2311 width *= height; 2312 height = 1; 2313 src_stride_argb = dst_stride_argb = 0; 2314 } 2315 #if defined(HAS_ARGBSHADEROW_SSE2) 2316 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) { 2317 ARGBShadeRow = ARGBShadeRow_SSE2; 2318 } 2319 #endif 2320 #if defined(HAS_ARGBSHADEROW_NEON) 2321 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 2322 ARGBShadeRow = ARGBShadeRow_NEON; 2323 } 2324 #endif 2325 #if defined(HAS_ARGBSHADEROW_MSA) 2326 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) { 2327 ARGBShadeRow = ARGBShadeRow_MSA; 2328 } 2329 #endif 2330 2331 for (y = 0; y < height; ++y) { 2332 ARGBShadeRow(src_argb, dst_argb, width, value); 2333 src_argb += src_stride_argb; 2334 dst_argb += dst_stride_argb; 2335 } 2336 return 0; 2337 } 2338 2339 // Interpolate 2 planes by specified amount (0 to 255). 2340 LIBYUV_API 2341 int InterpolatePlane(const uint8* src0, 2342 int src_stride0, 2343 const uint8* src1, 2344 int src_stride1, 2345 uint8* dst, 2346 int dst_stride, 2347 int width, 2348 int height, 2349 int interpolation) { 2350 int y; 2351 void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, 2352 ptrdiff_t src_stride, int dst_width, 2353 int source_y_fraction) = InterpolateRow_C; 2354 if (!src0 || !src1 || !dst || width <= 0 || height == 0) { 2355 return -1; 2356 } 2357 // Negative height means invert the image. 2358 if (height < 0) { 2359 height = -height; 2360 dst = dst + (height - 1) * dst_stride; 2361 dst_stride = -dst_stride; 2362 } 2363 // Coalesce rows. 2364 if (src_stride0 == width && src_stride1 == width && dst_stride == width) { 2365 width *= height; 2366 height = 1; 2367 src_stride0 = src_stride1 = dst_stride = 0; 2368 } 2369 #if defined(HAS_INTERPOLATEROW_SSSE3) 2370 if (TestCpuFlag(kCpuHasSSSE3)) { 2371 InterpolateRow = InterpolateRow_Any_SSSE3; 2372 if (IS_ALIGNED(width, 16)) { 2373 InterpolateRow = InterpolateRow_SSSE3; 2374 } 2375 } 2376 #endif 2377 #if defined(HAS_INTERPOLATEROW_AVX2) 2378 if (TestCpuFlag(kCpuHasAVX2)) { 2379 InterpolateRow = InterpolateRow_Any_AVX2; 2380 if (IS_ALIGNED(width, 32)) { 2381 InterpolateRow = InterpolateRow_AVX2; 2382 } 2383 } 2384 #endif 2385 #if defined(HAS_INTERPOLATEROW_NEON) 2386 if (TestCpuFlag(kCpuHasNEON)) { 2387 InterpolateRow = InterpolateRow_Any_NEON; 2388 if (IS_ALIGNED(width, 16)) { 2389 InterpolateRow = InterpolateRow_NEON; 2390 } 2391 } 2392 #endif 2393 #if defined(HAS_INTERPOLATEROW_DSPR2) 2394 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src0, 4) && 2395 IS_ALIGNED(src_stride0, 4) && IS_ALIGNED(src1, 4) && 2396 IS_ALIGNED(src_stride1, 4) && IS_ALIGNED(dst, 4) && 2397 IS_ALIGNED(dst_stride, 4) && IS_ALIGNED(width, 4)) { 2398 InterpolateRow = InterpolateRow_DSPR2; 2399 } 2400 #endif 2401 #if defined(HAS_INTERPOLATEROW_MSA) 2402 if (TestCpuFlag(kCpuHasMSA)) { 2403 InterpolateRow = InterpolateRow_Any_MSA; 2404 if (IS_ALIGNED(width, 32)) { 2405 InterpolateRow = InterpolateRow_MSA; 2406 } 2407 } 2408 #endif 2409 2410 for (y = 0; y < height; ++y) { 2411 InterpolateRow(dst, src0, src1 - src0, width, interpolation); 2412 src0 += src_stride0; 2413 src1 += src_stride1; 2414 dst += dst_stride; 2415 } 2416 return 0; 2417 } 2418 2419 // Interpolate 2 ARGB images by specified amount (0 to 255). 2420 LIBYUV_API 2421 int ARGBInterpolate(const uint8* src_argb0, 2422 int src_stride_argb0, 2423 const uint8* src_argb1, 2424 int src_stride_argb1, 2425 uint8* dst_argb, 2426 int dst_stride_argb, 2427 int width, 2428 int height, 2429 int interpolation) { 2430 return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1, 2431 src_stride_argb1, dst_argb, dst_stride_argb, 2432 width * 4, height, interpolation); 2433 } 2434 2435 // Interpolate 2 YUV images by specified amount (0 to 255). 2436 LIBYUV_API 2437 int I420Interpolate(const uint8* src0_y, 2438 int src0_stride_y, 2439 const uint8* src0_u, 2440 int src0_stride_u, 2441 const uint8* src0_v, 2442 int src0_stride_v, 2443 const uint8* src1_y, 2444 int src1_stride_y, 2445 const uint8* src1_u, 2446 int src1_stride_u, 2447 const uint8* src1_v, 2448 int src1_stride_v, 2449 uint8* dst_y, 2450 int dst_stride_y, 2451 uint8* dst_u, 2452 int dst_stride_u, 2453 uint8* dst_v, 2454 int dst_stride_v, 2455 int width, 2456 int height, 2457 int interpolation) { 2458 int halfwidth = (width + 1) >> 1; 2459 int halfheight = (height + 1) >> 1; 2460 if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v || 2461 !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { 2462 return -1; 2463 } 2464 InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y, 2465 dst_stride_y, width, height, interpolation); 2466 InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u, 2467 dst_stride_u, halfwidth, halfheight, interpolation); 2468 InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v, 2469 dst_stride_v, halfwidth, halfheight, interpolation); 2470 return 0; 2471 } 2472 2473 // Shuffle ARGB channel order. e.g. BGRA to ARGB. 2474 LIBYUV_API 2475 int ARGBShuffle(const uint8* src_bgra, 2476 int src_stride_bgra, 2477 uint8* dst_argb, 2478 int dst_stride_argb, 2479 const uint8* shuffler, 2480 int width, 2481 int height) { 2482 int y; 2483 void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb, 2484 const uint8* shuffler, int width) = ARGBShuffleRow_C; 2485 if (!src_bgra || !dst_argb || width <= 0 || height == 0) { 2486 return -1; 2487 } 2488 // Negative height means invert the image. 2489 if (height < 0) { 2490 height = -height; 2491 src_bgra = src_bgra + (height - 1) * src_stride_bgra; 2492 src_stride_bgra = -src_stride_bgra; 2493 } 2494 // Coalesce rows. 2495 if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) { 2496 width *= height; 2497 height = 1; 2498 src_stride_bgra = dst_stride_argb = 0; 2499 } 2500 #if defined(HAS_ARGBSHUFFLEROW_SSE2) 2501 if (TestCpuFlag(kCpuHasSSE2)) { 2502 ARGBShuffleRow = ARGBShuffleRow_Any_SSE2; 2503 if (IS_ALIGNED(width, 4)) { 2504 ARGBShuffleRow = ARGBShuffleRow_SSE2; 2505 } 2506 } 2507 #endif 2508 #if defined(HAS_ARGBSHUFFLEROW_SSSE3) 2509 if (TestCpuFlag(kCpuHasSSSE3)) { 2510 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3; 2511 if (IS_ALIGNED(width, 8)) { 2512 ARGBShuffleRow = ARGBShuffleRow_SSSE3; 2513 } 2514 } 2515 #endif 2516 #if defined(HAS_ARGBSHUFFLEROW_AVX2) 2517 if (TestCpuFlag(kCpuHasAVX2)) { 2518 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2; 2519 if (IS_ALIGNED(width, 16)) { 2520 ARGBShuffleRow = ARGBShuffleRow_AVX2; 2521 } 2522 } 2523 #endif 2524 #if defined(HAS_ARGBSHUFFLEROW_NEON) 2525 if (TestCpuFlag(kCpuHasNEON)) { 2526 ARGBShuffleRow = ARGBShuffleRow_Any_NEON; 2527 if (IS_ALIGNED(width, 4)) { 2528 ARGBShuffleRow = ARGBShuffleRow_NEON; 2529 } 2530 } 2531 #endif 2532 #if defined(HAS_ARGBSHUFFLEROW_MSA) 2533 if (TestCpuFlag(kCpuHasMSA)) { 2534 ARGBShuffleRow = ARGBShuffleRow_Any_MSA; 2535 if (IS_ALIGNED(width, 8)) { 2536 ARGBShuffleRow = ARGBShuffleRow_MSA; 2537 } 2538 } 2539 #endif 2540 2541 for (y = 0; y < height; ++y) { 2542 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width); 2543 src_bgra += src_stride_bgra; 2544 dst_argb += dst_stride_argb; 2545 } 2546 return 0; 2547 } 2548 2549 // Sobel ARGB effect. 2550 static int ARGBSobelize(const uint8* src_argb, 2551 int src_stride_argb, 2552 uint8* dst_argb, 2553 int dst_stride_argb, 2554 int width, 2555 int height, 2556 void (*SobelRow)(const uint8* src_sobelx, 2557 const uint8* src_sobely, 2558 uint8* dst, 2559 int width)) { 2560 int y; 2561 void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) = 2562 ARGBToYJRow_C; 2563 void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, 2564 int width) = SobelYRow_C; 2565 void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, 2566 const uint8* src_y2, uint8* dst_sobely, int width) = 2567 SobelXRow_C; 2568 const int kEdge = 16; // Extra pixels at start of row for extrude/align. 2569 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 2570 return -1; 2571 } 2572 // Negative height means invert the image. 2573 if (height < 0) { 2574 height = -height; 2575 src_argb = src_argb + (height - 1) * src_stride_argb; 2576 src_stride_argb = -src_stride_argb; 2577 } 2578 2579 #if defined(HAS_ARGBTOYJROW_SSSE3) 2580 if (TestCpuFlag(kCpuHasSSSE3)) { 2581 ARGBToYJRow = ARGBToYJRow_Any_SSSE3; 2582 if (IS_ALIGNED(width, 16)) { 2583 ARGBToYJRow = ARGBToYJRow_SSSE3; 2584 } 2585 } 2586 #endif 2587 #if defined(HAS_ARGBTOYJROW_AVX2) 2588 if (TestCpuFlag(kCpuHasAVX2)) { 2589 ARGBToYJRow = ARGBToYJRow_Any_AVX2; 2590 if (IS_ALIGNED(width, 32)) { 2591 ARGBToYJRow = ARGBToYJRow_AVX2; 2592 } 2593 } 2594 #endif 2595 #if defined(HAS_ARGBTOYJROW_NEON) 2596 if (TestCpuFlag(kCpuHasNEON)) { 2597 ARGBToYJRow = ARGBToYJRow_Any_NEON; 2598 if (IS_ALIGNED(width, 8)) { 2599 ARGBToYJRow = ARGBToYJRow_NEON; 2600 } 2601 } 2602 #endif 2603 #if defined(HAS_ARGBTOYJROW_MSA) 2604 if (TestCpuFlag(kCpuHasMSA)) { 2605 ARGBToYJRow = ARGBToYJRow_Any_MSA; 2606 if (IS_ALIGNED(width, 16)) { 2607 ARGBToYJRow = ARGBToYJRow_MSA; 2608 } 2609 } 2610 #endif 2611 2612 #if defined(HAS_SOBELYROW_SSE2) 2613 if (TestCpuFlag(kCpuHasSSE2)) { 2614 SobelYRow = SobelYRow_SSE2; 2615 } 2616 #endif 2617 #if defined(HAS_SOBELYROW_NEON) 2618 if (TestCpuFlag(kCpuHasNEON)) { 2619 SobelYRow = SobelYRow_NEON; 2620 } 2621 #endif 2622 #if defined(HAS_SOBELXROW_SSE2) 2623 if (TestCpuFlag(kCpuHasSSE2)) { 2624 SobelXRow = SobelXRow_SSE2; 2625 } 2626 #endif 2627 #if defined(HAS_SOBELXROW_NEON) 2628 if (TestCpuFlag(kCpuHasNEON)) { 2629 SobelXRow = SobelXRow_NEON; 2630 } 2631 #endif 2632 { 2633 // 3 rows with edges before/after. 2634 const int kRowSize = (width + kEdge + 31) & ~31; 2635 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); 2636 uint8* row_sobelx = rows; 2637 uint8* row_sobely = rows + kRowSize; 2638 uint8* row_y = rows + kRowSize * 2; 2639 2640 // Convert first row. 2641 uint8* row_y0 = row_y + kEdge; 2642 uint8* row_y1 = row_y0 + kRowSize; 2643 uint8* row_y2 = row_y1 + kRowSize; 2644 ARGBToYJRow(src_argb, row_y0, width); 2645 row_y0[-1] = row_y0[0]; 2646 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. 2647 ARGBToYJRow(src_argb, row_y1, width); 2648 row_y1[-1] = row_y1[0]; 2649 memset(row_y1 + width, row_y1[width - 1], 16); 2650 memset(row_y2 + width, 0, 16); 2651 2652 for (y = 0; y < height; ++y) { 2653 // Convert next row of ARGB to G. 2654 if (y < (height - 1)) { 2655 src_argb += src_stride_argb; 2656 } 2657 ARGBToYJRow(src_argb, row_y2, width); 2658 row_y2[-1] = row_y2[0]; 2659 row_y2[width] = row_y2[width - 1]; 2660 2661 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width); 2662 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width); 2663 SobelRow(row_sobelx, row_sobely, dst_argb, width); 2664 2665 // Cycle thru circular queue of 3 row_y buffers. 2666 { 2667 uint8* row_yt = row_y0; 2668 row_y0 = row_y1; 2669 row_y1 = row_y2; 2670 row_y2 = row_yt; 2671 } 2672 2673 dst_argb += dst_stride_argb; 2674 } 2675 free_aligned_buffer_64(rows); 2676 } 2677 return 0; 2678 } 2679 2680 // Sobel ARGB effect. 2681 LIBYUV_API 2682 int ARGBSobel(const uint8* src_argb, 2683 int src_stride_argb, 2684 uint8* dst_argb, 2685 int dst_stride_argb, 2686 int width, 2687 int height) { 2688 void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely, 2689 uint8* dst_argb, int width) = SobelRow_C; 2690 #if defined(HAS_SOBELROW_SSE2) 2691 if (TestCpuFlag(kCpuHasSSE2)) { 2692 SobelRow = SobelRow_Any_SSE2; 2693 if (IS_ALIGNED(width, 16)) { 2694 SobelRow = SobelRow_SSE2; 2695 } 2696 } 2697 #endif 2698 #if defined(HAS_SOBELROW_NEON) 2699 if (TestCpuFlag(kCpuHasNEON)) { 2700 SobelRow = SobelRow_Any_NEON; 2701 if (IS_ALIGNED(width, 8)) { 2702 SobelRow = SobelRow_NEON; 2703 } 2704 } 2705 #endif 2706 #if defined(HAS_SOBELROW_MSA) 2707 if (TestCpuFlag(kCpuHasMSA)) { 2708 SobelRow = SobelRow_Any_MSA; 2709 if (IS_ALIGNED(width, 16)) { 2710 SobelRow = SobelRow_MSA; 2711 } 2712 } 2713 #endif 2714 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, 2715 width, height, SobelRow); 2716 } 2717 2718 // Sobel ARGB effect with planar output. 2719 LIBYUV_API 2720 int ARGBSobelToPlane(const uint8* src_argb, 2721 int src_stride_argb, 2722 uint8* dst_y, 2723 int dst_stride_y, 2724 int width, 2725 int height) { 2726 void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely, 2727 uint8* dst_, int width) = SobelToPlaneRow_C; 2728 #if defined(HAS_SOBELTOPLANEROW_SSE2) 2729 if (TestCpuFlag(kCpuHasSSE2)) { 2730 SobelToPlaneRow = SobelToPlaneRow_Any_SSE2; 2731 if (IS_ALIGNED(width, 16)) { 2732 SobelToPlaneRow = SobelToPlaneRow_SSE2; 2733 } 2734 } 2735 #endif 2736 #if defined(HAS_SOBELTOPLANEROW_NEON) 2737 if (TestCpuFlag(kCpuHasNEON)) { 2738 SobelToPlaneRow = SobelToPlaneRow_Any_NEON; 2739 if (IS_ALIGNED(width, 16)) { 2740 SobelToPlaneRow = SobelToPlaneRow_NEON; 2741 } 2742 } 2743 #endif 2744 #if defined(HAS_SOBELTOPLANEROW_MSA) 2745 if (TestCpuFlag(kCpuHasMSA)) { 2746 SobelToPlaneRow = SobelToPlaneRow_Any_MSA; 2747 if (IS_ALIGNED(width, 32)) { 2748 SobelToPlaneRow = SobelToPlaneRow_MSA; 2749 } 2750 } 2751 #endif 2752 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width, 2753 height, SobelToPlaneRow); 2754 } 2755 2756 // SobelXY ARGB effect. 2757 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. 2758 LIBYUV_API 2759 int ARGBSobelXY(const uint8* src_argb, 2760 int src_stride_argb, 2761 uint8* dst_argb, 2762 int dst_stride_argb, 2763 int width, 2764 int height) { 2765 void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, 2766 uint8* dst_argb, int width) = SobelXYRow_C; 2767 #if defined(HAS_SOBELXYROW_SSE2) 2768 if (TestCpuFlag(kCpuHasSSE2)) { 2769 SobelXYRow = SobelXYRow_Any_SSE2; 2770 if (IS_ALIGNED(width, 16)) { 2771 SobelXYRow = SobelXYRow_SSE2; 2772 } 2773 } 2774 #endif 2775 #if defined(HAS_SOBELXYROW_NEON) 2776 if (TestCpuFlag(kCpuHasNEON)) { 2777 SobelXYRow = SobelXYRow_Any_NEON; 2778 if (IS_ALIGNED(width, 8)) { 2779 SobelXYRow = SobelXYRow_NEON; 2780 } 2781 } 2782 #endif 2783 #if defined(HAS_SOBELXYROW_MSA) 2784 if (TestCpuFlag(kCpuHasMSA)) { 2785 SobelXYRow = SobelXYRow_Any_MSA; 2786 if (IS_ALIGNED(width, 16)) { 2787 SobelXYRow = SobelXYRow_MSA; 2788 } 2789 } 2790 #endif 2791 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, 2792 width, height, SobelXYRow); 2793 } 2794 2795 // Apply a 4x4 polynomial to each ARGB pixel. 2796 LIBYUV_API 2797 int ARGBPolynomial(const uint8* src_argb, 2798 int src_stride_argb, 2799 uint8* dst_argb, 2800 int dst_stride_argb, 2801 const float* poly, 2802 int width, 2803 int height) { 2804 int y; 2805 void (*ARGBPolynomialRow)(const uint8* src_argb, uint8* dst_argb, 2806 const float* poly, int width) = ARGBPolynomialRow_C; 2807 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) { 2808 return -1; 2809 } 2810 // Negative height means invert the image. 2811 if (height < 0) { 2812 height = -height; 2813 src_argb = src_argb + (height - 1) * src_stride_argb; 2814 src_stride_argb = -src_stride_argb; 2815 } 2816 // Coalesce rows. 2817 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { 2818 width *= height; 2819 height = 1; 2820 src_stride_argb = dst_stride_argb = 0; 2821 } 2822 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2) 2823 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) { 2824 ARGBPolynomialRow = ARGBPolynomialRow_SSE2; 2825 } 2826 #endif 2827 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2) 2828 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) && 2829 IS_ALIGNED(width, 2)) { 2830 ARGBPolynomialRow = ARGBPolynomialRow_AVX2; 2831 } 2832 #endif 2833 2834 for (y = 0; y < height; ++y) { 2835 ARGBPolynomialRow(src_argb, dst_argb, poly, width); 2836 src_argb += src_stride_argb; 2837 dst_argb += dst_stride_argb; 2838 } 2839 return 0; 2840 } 2841 2842 // Convert plane of 16 bit shorts to half floats. 2843 // Source values are multiplied by scale before storing as half float. 2844 LIBYUV_API 2845 int HalfFloatPlane(const uint16* src_y, 2846 int src_stride_y, 2847 uint16* dst_y, 2848 int dst_stride_y, 2849 float scale, 2850 int width, 2851 int height) { 2852 int y; 2853 void (*HalfFloatRow)(const uint16* src, uint16* dst, float scale, int width) = 2854 HalfFloatRow_C; 2855 if (!src_y || !dst_y || width <= 0 || height == 0) { 2856 return -1; 2857 } 2858 src_stride_y >>= 1; 2859 dst_stride_y >>= 1; 2860 // Negative height means invert the image. 2861 if (height < 0) { 2862 height = -height; 2863 src_y = src_y + (height - 1) * src_stride_y; 2864 src_stride_y = -src_stride_y; 2865 } 2866 // Coalesce rows. 2867 if (src_stride_y == width && dst_stride_y == width) { 2868 width *= height; 2869 height = 1; 2870 src_stride_y = dst_stride_y = 0; 2871 } 2872 #if defined(HAS_HALFFLOATROW_SSE2) 2873 if (TestCpuFlag(kCpuHasSSE2)) { 2874 HalfFloatRow = HalfFloatRow_Any_SSE2; 2875 if (IS_ALIGNED(width, 8)) { 2876 HalfFloatRow = HalfFloatRow_SSE2; 2877 } 2878 } 2879 #endif 2880 #if defined(HAS_HALFFLOATROW_AVX2) 2881 if (TestCpuFlag(kCpuHasAVX2)) { 2882 HalfFloatRow = HalfFloatRow_Any_AVX2; 2883 if (IS_ALIGNED(width, 16)) { 2884 HalfFloatRow = HalfFloatRow_AVX2; 2885 } 2886 } 2887 #endif 2888 #if defined(HAS_HALFFLOATROW_F16C) 2889 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) { 2890 HalfFloatRow = 2891 (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C; 2892 if (IS_ALIGNED(width, 16)) { 2893 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C; 2894 } 2895 } 2896 #endif 2897 #if defined(HAS_HALFFLOATROW_NEON) 2898 if (TestCpuFlag(kCpuHasNEON)) { 2899 HalfFloatRow = 2900 (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON; 2901 if (IS_ALIGNED(width, 8)) { 2902 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON; 2903 } 2904 } 2905 #endif 2906 2907 for (y = 0; y < height; ++y) { 2908 HalfFloatRow(src_y, dst_y, scale, width); 2909 src_y += src_stride_y; 2910 dst_y += dst_stride_y; 2911 } 2912 return 0; 2913 } 2914 2915 // Apply a lumacolortable to each ARGB pixel. 2916 LIBYUV_API 2917 int ARGBLumaColorTable(const uint8* src_argb, 2918 int src_stride_argb, 2919 uint8* dst_argb, 2920 int dst_stride_argb, 2921 const uint8* luma, 2922 int width, 2923 int height) { 2924 int y; 2925 void (*ARGBLumaColorTableRow)( 2926 const uint8* src_argb, uint8* dst_argb, int width, const uint8* luma, 2927 const uint32 lumacoeff) = ARGBLumaColorTableRow_C; 2928 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { 2929 return -1; 2930 } 2931 // Negative height means invert the image. 2932 if (height < 0) { 2933 height = -height; 2934 src_argb = src_argb + (height - 1) * src_stride_argb; 2935 src_stride_argb = -src_stride_argb; 2936 } 2937 // Coalesce rows. 2938 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { 2939 width *= height; 2940 height = 1; 2941 src_stride_argb = dst_stride_argb = 0; 2942 } 2943 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3) 2944 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { 2945 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3; 2946 } 2947 #endif 2948 2949 for (y = 0; y < height; ++y) { 2950 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f); 2951 src_argb += src_stride_argb; 2952 dst_argb += dst_stride_argb; 2953 } 2954 return 0; 2955 } 2956 2957 // Copy Alpha from one ARGB image to another. 2958 LIBYUV_API 2959 int ARGBCopyAlpha(const uint8* src_argb, 2960 int src_stride_argb, 2961 uint8* dst_argb, 2962 int dst_stride_argb, 2963 int width, 2964 int height) { 2965 int y; 2966 void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) = 2967 ARGBCopyAlphaRow_C; 2968 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 2969 return -1; 2970 } 2971 // Negative height means invert the image. 2972 if (height < 0) { 2973 height = -height; 2974 src_argb = src_argb + (height - 1) * src_stride_argb; 2975 src_stride_argb = -src_stride_argb; 2976 } 2977 // Coalesce rows. 2978 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { 2979 width *= height; 2980 height = 1; 2981 src_stride_argb = dst_stride_argb = 0; 2982 } 2983 #if defined(HAS_ARGBCOPYALPHAROW_SSE2) 2984 if (TestCpuFlag(kCpuHasSSE2)) { 2985 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2; 2986 if (IS_ALIGNED(width, 8)) { 2987 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2; 2988 } 2989 } 2990 #endif 2991 #if defined(HAS_ARGBCOPYALPHAROW_AVX2) 2992 if (TestCpuFlag(kCpuHasAVX2)) { 2993 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2; 2994 if (IS_ALIGNED(width, 16)) { 2995 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2; 2996 } 2997 } 2998 #endif 2999 3000 for (y = 0; y < height; ++y) { 3001 ARGBCopyAlphaRow(src_argb, dst_argb, width); 3002 src_argb += src_stride_argb; 3003 dst_argb += dst_stride_argb; 3004 } 3005 return 0; 3006 } 3007 3008 // Extract just the alpha channel from ARGB. 3009 LIBYUV_API 3010 int ARGBExtractAlpha(const uint8* src_argb, 3011 int src_stride, 3012 uint8* dst_a, 3013 int dst_stride, 3014 int width, 3015 int height) { 3016 if (!src_argb || !dst_a || width <= 0 || height == 0) { 3017 return -1; 3018 } 3019 // Negative height means invert the image. 3020 if (height < 0) { 3021 height = -height; 3022 src_argb += (height - 1) * src_stride; 3023 src_stride = -src_stride; 3024 } 3025 // Coalesce rows. 3026 if (src_stride == width * 4 && dst_stride == width) { 3027 width *= height; 3028 height = 1; 3029 src_stride = dst_stride = 0; 3030 } 3031 void (*ARGBExtractAlphaRow)(const uint8* src_argb, uint8* dst_a, int width) = 3032 ARGBExtractAlphaRow_C; 3033 #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2) 3034 if (TestCpuFlag(kCpuHasSSE2)) { 3035 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2 3036 : ARGBExtractAlphaRow_Any_SSE2; 3037 } 3038 #endif 3039 #if defined(HAS_ARGBEXTRACTALPHAROW_AVX2) 3040 if (TestCpuFlag(kCpuHasAVX2)) { 3041 ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2 3042 : ARGBExtractAlphaRow_Any_AVX2; 3043 } 3044 #endif 3045 #if defined(HAS_ARGBEXTRACTALPHAROW_NEON) 3046 if (TestCpuFlag(kCpuHasNEON)) { 3047 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON 3048 : ARGBExtractAlphaRow_Any_NEON; 3049 } 3050 #endif 3051 3052 for (int y = 0; y < height; ++y) { 3053 ARGBExtractAlphaRow(src_argb, dst_a, width); 3054 src_argb += src_stride; 3055 dst_a += dst_stride; 3056 } 3057 return 0; 3058 } 3059 3060 // Copy a planar Y channel to the alpha channel of a destination ARGB image. 3061 LIBYUV_API 3062 int ARGBCopyYToAlpha(const uint8* src_y, 3063 int src_stride_y, 3064 uint8* dst_argb, 3065 int dst_stride_argb, 3066 int width, 3067 int height) { 3068 int y; 3069 void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = 3070 ARGBCopyYToAlphaRow_C; 3071 if (!src_y || !dst_argb || width <= 0 || height == 0) { 3072 return -1; 3073 } 3074 // Negative height means invert the image. 3075 if (height < 0) { 3076 height = -height; 3077 src_y = src_y + (height - 1) * src_stride_y; 3078 src_stride_y = -src_stride_y; 3079 } 3080 // Coalesce rows. 3081 if (src_stride_y == width && dst_stride_argb == width * 4) { 3082 width *= height; 3083 height = 1; 3084 src_stride_y = dst_stride_argb = 0; 3085 } 3086 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2) 3087 if (TestCpuFlag(kCpuHasSSE2)) { 3088 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2; 3089 if (IS_ALIGNED(width, 8)) { 3090 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; 3091 } 3092 } 3093 #endif 3094 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2) 3095 if (TestCpuFlag(kCpuHasAVX2)) { 3096 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2; 3097 if (IS_ALIGNED(width, 16)) { 3098 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; 3099 } 3100 } 3101 #endif 3102 3103 for (y = 0; y < height; ++y) { 3104 ARGBCopyYToAlphaRow(src_y, dst_argb, width); 3105 src_y += src_stride_y; 3106 dst_argb += dst_stride_argb; 3107 } 3108 return 0; 3109 } 3110 3111 // TODO(fbarchard): Consider if width is even Y channel can be split 3112 // directly. A SplitUVRow_Odd function could copy the remaining chroma. 3113 3114 LIBYUV_API 3115 int YUY2ToNV12(const uint8* src_yuy2, 3116 int src_stride_yuy2, 3117 uint8* dst_y, 3118 int dst_stride_y, 3119 uint8* dst_uv, 3120 int dst_stride_uv, 3121 int width, 3122 int height) { 3123 int y; 3124 int halfwidth = (width + 1) >> 1; 3125 void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 3126 int width) = SplitUVRow_C; 3127 void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, 3128 ptrdiff_t src_stride, int dst_width, 3129 int source_y_fraction) = InterpolateRow_C; 3130 if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) { 3131 return -1; 3132 } 3133 // Negative height means invert the image. 3134 if (height < 0) { 3135 height = -height; 3136 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; 3137 src_stride_yuy2 = -src_stride_yuy2; 3138 } 3139 #if defined(HAS_SPLITUVROW_SSE2) 3140 if (TestCpuFlag(kCpuHasSSE2)) { 3141 SplitUVRow = SplitUVRow_Any_SSE2; 3142 if (IS_ALIGNED(width, 16)) { 3143 SplitUVRow = SplitUVRow_SSE2; 3144 } 3145 } 3146 #endif 3147 #if defined(HAS_SPLITUVROW_AVX2) 3148 if (TestCpuFlag(kCpuHasAVX2)) { 3149 SplitUVRow = SplitUVRow_Any_AVX2; 3150 if (IS_ALIGNED(width, 32)) { 3151 SplitUVRow = SplitUVRow_AVX2; 3152 } 3153 } 3154 #endif 3155 #if defined(HAS_SPLITUVROW_NEON) 3156 if (TestCpuFlag(kCpuHasNEON)) { 3157 SplitUVRow = SplitUVRow_Any_NEON; 3158 if (IS_ALIGNED(width, 16)) { 3159 SplitUVRow = SplitUVRow_NEON; 3160 } 3161 } 3162 #endif 3163 #if defined(HAS_INTERPOLATEROW_SSSE3) 3164 if (TestCpuFlag(kCpuHasSSSE3)) { 3165 InterpolateRow = InterpolateRow_Any_SSSE3; 3166 if (IS_ALIGNED(width, 16)) { 3167 InterpolateRow = InterpolateRow_SSSE3; 3168 } 3169 } 3170 #endif 3171 #if defined(HAS_INTERPOLATEROW_AVX2) 3172 if (TestCpuFlag(kCpuHasAVX2)) { 3173 InterpolateRow = InterpolateRow_Any_AVX2; 3174 if (IS_ALIGNED(width, 32)) { 3175 InterpolateRow = InterpolateRow_AVX2; 3176 } 3177 } 3178 #endif 3179 #if defined(HAS_INTERPOLATEROW_NEON) 3180 if (TestCpuFlag(kCpuHasNEON)) { 3181 InterpolateRow = InterpolateRow_Any_NEON; 3182 if (IS_ALIGNED(width, 16)) { 3183 InterpolateRow = InterpolateRow_NEON; 3184 } 3185 } 3186 #endif 3187 #if defined(HAS_INTERPOLATEROW_MSA) 3188 if (TestCpuFlag(kCpuHasMSA)) { 3189 InterpolateRow = InterpolateRow_Any_MSA; 3190 if (IS_ALIGNED(width, 32)) { 3191 InterpolateRow = InterpolateRow_MSA; 3192 } 3193 } 3194 #endif 3195 3196 { 3197 int awidth = halfwidth * 2; 3198 // row of y and 2 rows of uv 3199 align_buffer_64(rows, awidth * 3); 3200 3201 for (y = 0; y < height - 1; y += 2) { 3202 // Split Y from UV. 3203 SplitUVRow(src_yuy2, rows, rows + awidth, awidth); 3204 memcpy(dst_y, rows, width); 3205 SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth); 3206 memcpy(dst_y + dst_stride_y, rows, width); 3207 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128); 3208 src_yuy2 += src_stride_yuy2 * 2; 3209 dst_y += dst_stride_y * 2; 3210 dst_uv += dst_stride_uv; 3211 } 3212 if (height & 1) { 3213 // Split Y from UV. 3214 SplitUVRow(src_yuy2, rows, dst_uv, awidth); 3215 memcpy(dst_y, rows, width); 3216 } 3217 free_aligned_buffer_64(rows); 3218 } 3219 return 0; 3220 } 3221 3222 LIBYUV_API 3223 int UYVYToNV12(const uint8* src_uyvy, 3224 int src_stride_uyvy, 3225 uint8* dst_y, 3226 int dst_stride_y, 3227 uint8* dst_uv, 3228 int dst_stride_uv, 3229 int width, 3230 int height) { 3231 int y; 3232 int halfwidth = (width + 1) >> 1; 3233 void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 3234 int width) = SplitUVRow_C; 3235 void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, 3236 ptrdiff_t src_stride, int dst_width, 3237 int source_y_fraction) = InterpolateRow_C; 3238 if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) { 3239 return -1; 3240 } 3241 // Negative height means invert the image. 3242 if (height < 0) { 3243 height = -height; 3244 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; 3245 src_stride_uyvy = -src_stride_uyvy; 3246 } 3247 #if defined(HAS_SPLITUVROW_SSE2) 3248 if (TestCpuFlag(kCpuHasSSE2)) { 3249 SplitUVRow = SplitUVRow_Any_SSE2; 3250 if (IS_ALIGNED(width, 16)) { 3251 SplitUVRow = SplitUVRow_SSE2; 3252 } 3253 } 3254 #endif 3255 #if defined(HAS_SPLITUVROW_AVX2) 3256 if (TestCpuFlag(kCpuHasAVX2)) { 3257 SplitUVRow = SplitUVRow_Any_AVX2; 3258 if (IS_ALIGNED(width, 32)) { 3259 SplitUVRow = SplitUVRow_AVX2; 3260 } 3261 } 3262 #endif 3263 #if defined(HAS_SPLITUVROW_NEON) 3264 if (TestCpuFlag(kCpuHasNEON)) { 3265 SplitUVRow = SplitUVRow_Any_NEON; 3266 if (IS_ALIGNED(width, 16)) { 3267 SplitUVRow = SplitUVRow_NEON; 3268 } 3269 } 3270 #endif 3271 #if defined(HAS_INTERPOLATEROW_SSSE3) 3272 if (TestCpuFlag(kCpuHasSSSE3)) { 3273 InterpolateRow = InterpolateRow_Any_SSSE3; 3274 if (IS_ALIGNED(width, 16)) { 3275 InterpolateRow = InterpolateRow_SSSE3; 3276 } 3277 } 3278 #endif 3279 #if defined(HAS_INTERPOLATEROW_AVX2) 3280 if (TestCpuFlag(kCpuHasAVX2)) { 3281 InterpolateRow = InterpolateRow_Any_AVX2; 3282 if (IS_ALIGNED(width, 32)) { 3283 InterpolateRow = InterpolateRow_AVX2; 3284 } 3285 } 3286 #endif 3287 #if defined(HAS_INTERPOLATEROW_NEON) 3288 if (TestCpuFlag(kCpuHasNEON)) { 3289 InterpolateRow = InterpolateRow_Any_NEON; 3290 if (IS_ALIGNED(width, 16)) { 3291 InterpolateRow = InterpolateRow_NEON; 3292 } 3293 } 3294 #endif 3295 #if defined(HAS_INTERPOLATEROW_MSA) 3296 if (TestCpuFlag(kCpuHasMSA)) { 3297 InterpolateRow = InterpolateRow_Any_MSA; 3298 if (IS_ALIGNED(width, 32)) { 3299 InterpolateRow = InterpolateRow_MSA; 3300 } 3301 } 3302 #endif 3303 3304 { 3305 int awidth = halfwidth * 2; 3306 // row of y and 2 rows of uv 3307 align_buffer_64(rows, awidth * 3); 3308 3309 for (y = 0; y < height - 1; y += 2) { 3310 // Split Y from UV. 3311 SplitUVRow(src_uyvy, rows + awidth, rows, awidth); 3312 memcpy(dst_y, rows, width); 3313 SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth); 3314 memcpy(dst_y + dst_stride_y, rows, width); 3315 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128); 3316 src_uyvy += src_stride_uyvy * 2; 3317 dst_y += dst_stride_y * 2; 3318 dst_uv += dst_stride_uv; 3319 } 3320 if (height & 1) { 3321 // Split Y from UV. 3322 SplitUVRow(src_uyvy, dst_uv, rows, awidth); 3323 memcpy(dst_y, rows, width); 3324 } 3325 free_aligned_buffer_64(rows); 3326 } 3327 return 0; 3328 } 3329 3330 #ifdef __cplusplus 3331 } // extern "C" 3332 } // namespace libyuv 3333 #endif 3334