1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/planar_functions.h" 12 13 #include <string.h> // for memset() 14 15 #include "libyuv/cpu_id.h" 16 #ifdef HAVE_JPEG 17 #include "libyuv/mjpeg_decoder.h" 18 #endif 19 #include "libyuv/row.h" 20 21 #ifdef __cplusplus 22 namespace libyuv { 23 extern "C" { 24 #endif 25 26 // Copy a plane of data 27 LIBYUV_API 28 void CopyPlane(const uint8* src_y, int src_stride_y, 29 uint8* dst_y, int dst_stride_y, 30 int width, int height) { 31 int y; 32 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; 33 // Coalesce rows. 34 if (src_stride_y == width && 35 dst_stride_y == width) { 36 width *= height; 37 height = 1; 38 src_stride_y = dst_stride_y = 0; 39 } 40 // Nothing to do. 41 if (src_y == dst_y && src_stride_y == dst_stride_y) { 42 return; 43 } 44 #if defined(HAS_COPYROW_X86) 45 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { 46 CopyRow = CopyRow_X86; 47 } 48 #endif 49 #if defined(HAS_COPYROW_SSE2) 50 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && 51 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 52 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 53 CopyRow = CopyRow_SSE2; 54 } 55 #endif 56 #if defined(HAS_COPYROW_ERMS) 57 if (TestCpuFlag(kCpuHasERMS)) { 58 CopyRow = CopyRow_ERMS; 59 } 60 #endif 61 #if defined(HAS_COPYROW_NEON) 62 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { 63 CopyRow = CopyRow_NEON; 64 } 65 #endif 66 #if defined(HAS_COPYROW_MIPS) 67 if (TestCpuFlag(kCpuHasMIPS)) { 68 CopyRow = CopyRow_MIPS; 69 } 70 #endif 71 72 // Copy plane 73 for (y = 0; y < height; ++y) { 74 CopyRow(src_y, dst_y, width); 75 src_y += src_stride_y; 76 dst_y += dst_stride_y; 77 } 78 } 79 80 LIBYUV_API 81 void CopyPlane_16(const uint16* src_y, int src_stride_y, 82 uint16* dst_y, int dst_stride_y, 83 int width, int height) { 84 int y; 85 void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C; 86 // Coalesce rows. 87 if (src_stride_y == width && 88 dst_stride_y == width) { 89 width *= height; 90 height = 1; 91 src_stride_y = dst_stride_y = 0; 92 } 93 #if defined(HAS_COPYROW_16_X86) 94 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { 95 CopyRow = CopyRow_16_X86; 96 } 97 #endif 98 #if defined(HAS_COPYROW_16_SSE2) 99 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && 100 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 101 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 102 CopyRow = CopyRow_16_SSE2; 103 } 104 #endif 105 #if defined(HAS_COPYROW_16_ERMS) 106 if (TestCpuFlag(kCpuHasERMS)) { 107 CopyRow = CopyRow_16_ERMS; 108 } 109 #endif 110 #if defined(HAS_COPYROW_16_NEON) 111 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { 112 CopyRow = CopyRow_16_NEON; 113 } 114 #endif 115 #if defined(HAS_COPYROW_16_MIPS) 116 if (TestCpuFlag(kCpuHasMIPS)) { 117 CopyRow = CopyRow_16_MIPS; 118 } 119 #endif 120 121 // Copy plane 122 for (y = 0; y < height; ++y) { 123 CopyRow(src_y, dst_y, width); 124 src_y += src_stride_y; 125 dst_y += dst_stride_y; 126 } 127 } 128 129 // Copy I422. 130 LIBYUV_API 131 int I422Copy(const uint8* src_y, int src_stride_y, 132 const uint8* src_u, int src_stride_u, 133 const uint8* src_v, int src_stride_v, 134 uint8* dst_y, int dst_stride_y, 135 uint8* dst_u, int dst_stride_u, 136 uint8* dst_v, int dst_stride_v, 137 int width, int height) { 138 int halfwidth = (width + 1) >> 1; 139 if (!src_y || !src_u || !src_v || 140 !dst_y || !dst_u || !dst_v || 141 width <= 0 || height == 0) { 142 return -1; 143 } 144 // Negative height means invert the image. 145 if (height < 0) { 146 height = -height; 147 src_y = src_y + (height - 1) * src_stride_y; 148 src_u = src_u + (height - 1) * src_stride_u; 149 src_v = src_v + (height - 1) * src_stride_v; 150 src_stride_y = -src_stride_y; 151 src_stride_u = -src_stride_u; 152 src_stride_v = -src_stride_v; 153 } 154 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 155 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); 156 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); 157 return 0; 158 } 159 160 // Copy I444. 161 LIBYUV_API 162 int I444Copy(const uint8* src_y, int src_stride_y, 163 const uint8* src_u, int src_stride_u, 164 const uint8* src_v, int src_stride_v, 165 uint8* dst_y, int dst_stride_y, 166 uint8* dst_u, int dst_stride_u, 167 uint8* dst_v, int dst_stride_v, 168 int width, int height) { 169 if (!src_y || !src_u || !src_v || 170 !dst_y || !dst_u || !dst_v || 171 width <= 0 || height == 0) { 172 return -1; 173 } 174 // Negative height means invert the image. 175 if (height < 0) { 176 height = -height; 177 src_y = src_y + (height - 1) * src_stride_y; 178 src_u = src_u + (height - 1) * src_stride_u; 179 src_v = src_v + (height - 1) * src_stride_v; 180 src_stride_y = -src_stride_y; 181 src_stride_u = -src_stride_u; 182 src_stride_v = -src_stride_v; 183 } 184 185 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 186 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height); 187 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height); 188 return 0; 189 } 190 191 // Copy I400. 192 LIBYUV_API 193 int I400ToI400(const uint8* src_y, int src_stride_y, 194 uint8* dst_y, int dst_stride_y, 195 int width, int height) { 196 if (!src_y || !dst_y || width <= 0 || height == 0) { 197 return -1; 198 } 199 // Negative height means invert the image. 200 if (height < 0) { 201 height = -height; 202 src_y = src_y + (height - 1) * src_stride_y; 203 src_stride_y = -src_stride_y; 204 } 205 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 206 return 0; 207 } 208 209 // Convert I420 to I400. 210 LIBYUV_API 211 int I420ToI400(const uint8* src_y, int src_stride_y, 212 const uint8* src_u, int src_stride_u, 213 const uint8* src_v, int src_stride_v, 214 uint8* dst_y, int dst_stride_y, 215 int width, int height) { 216 if (!src_y || !dst_y || width <= 0 || height == 0) { 217 return -1; 218 } 219 // Negative height means invert the image. 220 if (height < 0) { 221 height = -height; 222 src_y = src_y + (height - 1) * src_stride_y; 223 src_stride_y = -src_stride_y; 224 } 225 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 226 return 0; 227 } 228 229 // Mirror a plane of data. 230 void MirrorPlane(const uint8* src_y, int src_stride_y, 231 uint8* dst_y, int dst_stride_y, 232 int width, int height) { 233 int y; 234 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; 235 // Negative height means invert the image. 236 if (height < 0) { 237 height = -height; 238 src_y = src_y + (height - 1) * src_stride_y; 239 src_stride_y = -src_stride_y; 240 } 241 #if defined(HAS_MIRRORROW_NEON) 242 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { 243 MirrorRow = MirrorRow_NEON; 244 } 245 #endif 246 #if defined(HAS_MIRRORROW_SSE2) 247 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { 248 MirrorRow = MirrorRow_SSE2; 249 } 250 #endif 251 #if defined(HAS_MIRRORROW_SSSE3) 252 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && 253 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 254 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 255 MirrorRow = MirrorRow_SSSE3; 256 } 257 #endif 258 #if defined(HAS_MIRRORROW_AVX2) 259 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) { 260 MirrorRow = MirrorRow_AVX2; 261 } 262 #endif 263 264 // Mirror plane 265 for (y = 0; y < height; ++y) { 266 MirrorRow(src_y, dst_y, width); 267 src_y += src_stride_y; 268 dst_y += dst_stride_y; 269 } 270 } 271 272 // Convert YUY2 to I422. 273 LIBYUV_API 274 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, 275 uint8* dst_y, int dst_stride_y, 276 uint8* dst_u, int dst_stride_u, 277 uint8* dst_v, int dst_stride_v, 278 int width, int height) { 279 int y; 280 void (*YUY2ToUV422Row)(const uint8* src_yuy2, 281 uint8* dst_u, uint8* dst_v, int pix) = 282 YUY2ToUV422Row_C; 283 void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) = 284 YUY2ToYRow_C; 285 // Negative height means invert the image. 286 if (height < 0) { 287 height = -height; 288 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; 289 src_stride_yuy2 = -src_stride_yuy2; 290 } 291 // Coalesce rows. 292 if (src_stride_yuy2 == width * 2 && 293 dst_stride_y == width && 294 dst_stride_u * 2 == width && 295 dst_stride_v * 2 == width) { 296 width *= height; 297 height = 1; 298 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0; 299 } 300 #if defined(HAS_YUY2TOYROW_SSE2) 301 if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { 302 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; 303 YUY2ToYRow = YUY2ToYRow_Any_SSE2; 304 if (IS_ALIGNED(width, 16)) { 305 YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2; 306 YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; 307 if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) { 308 YUY2ToUV422Row = YUY2ToUV422Row_SSE2; 309 if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 310 YUY2ToYRow = YUY2ToYRow_SSE2; 311 } 312 } 313 } 314 } 315 #endif 316 #if defined(HAS_YUY2TOYROW_AVX2) 317 if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { 318 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2; 319 YUY2ToYRow = YUY2ToYRow_Any_AVX2; 320 if (IS_ALIGNED(width, 32)) { 321 YUY2ToUV422Row = YUY2ToUV422Row_AVX2; 322 YUY2ToYRow = YUY2ToYRow_AVX2; 323 } 324 } 325 #endif 326 #if defined(HAS_YUY2TOYROW_NEON) 327 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 328 YUY2ToYRow = YUY2ToYRow_Any_NEON; 329 if (width >= 16) { 330 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; 331 } 332 if (IS_ALIGNED(width, 16)) { 333 YUY2ToYRow = YUY2ToYRow_NEON; 334 YUY2ToUV422Row = YUY2ToUV422Row_NEON; 335 } 336 } 337 #endif 338 339 for (y = 0; y < height; ++y) { 340 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); 341 YUY2ToYRow(src_yuy2, dst_y, width); 342 src_yuy2 += src_stride_yuy2; 343 dst_y += dst_stride_y; 344 dst_u += dst_stride_u; 345 dst_v += dst_stride_v; 346 } 347 return 0; 348 } 349 350 // Convert UYVY to I422. 351 LIBYUV_API 352 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, 353 uint8* dst_y, int dst_stride_y, 354 uint8* dst_u, int dst_stride_u, 355 uint8* dst_v, int dst_stride_v, 356 int width, int height) { 357 int y; 358 void (*UYVYToUV422Row)(const uint8* src_uyvy, 359 uint8* dst_u, uint8* dst_v, int pix) = 360 UYVYToUV422Row_C; 361 void (*UYVYToYRow)(const uint8* src_uyvy, 362 uint8* dst_y, int pix) = UYVYToYRow_C; 363 // Negative height means invert the image. 364 if (height < 0) { 365 height = -height; 366 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; 367 src_stride_uyvy = -src_stride_uyvy; 368 } 369 // Coalesce rows. 370 if (src_stride_uyvy == width * 2 && 371 dst_stride_y == width && 372 dst_stride_u * 2 == width && 373 dst_stride_v * 2 == width) { 374 width *= height; 375 height = 1; 376 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0; 377 } 378 #if defined(HAS_UYVYTOYROW_SSE2) 379 if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { 380 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2; 381 UYVYToYRow = UYVYToYRow_Any_SSE2; 382 if (IS_ALIGNED(width, 16)) { 383 UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2; 384 UYVYToYRow = UYVYToYRow_Unaligned_SSE2; 385 if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) { 386 UYVYToUV422Row = UYVYToUV422Row_SSE2; 387 if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 388 UYVYToYRow = UYVYToYRow_SSE2; 389 } 390 } 391 } 392 } 393 #endif 394 #if defined(HAS_UYVYTOYROW_AVX2) 395 if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { 396 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2; 397 UYVYToYRow = UYVYToYRow_Any_AVX2; 398 if (IS_ALIGNED(width, 32)) { 399 UYVYToUV422Row = UYVYToUV422Row_AVX2; 400 UYVYToYRow = UYVYToYRow_AVX2; 401 } 402 } 403 #endif 404 #if defined(HAS_UYVYTOYROW_NEON) 405 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 406 UYVYToYRow = UYVYToYRow_Any_NEON; 407 if (width >= 16) { 408 UYVYToUV422Row = UYVYToUV422Row_Any_NEON; 409 } 410 if (IS_ALIGNED(width, 16)) { 411 UYVYToYRow = UYVYToYRow_NEON; 412 UYVYToUV422Row = UYVYToUV422Row_NEON; 413 } 414 } 415 #endif 416 417 for (y = 0; y < height; ++y) { 418 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width); 419 UYVYToYRow(src_uyvy, dst_y, width); 420 src_uyvy += src_stride_uyvy; 421 dst_y += dst_stride_y; 422 dst_u += dst_stride_u; 423 dst_v += dst_stride_v; 424 } 425 return 0; 426 } 427 428 // Mirror I400 with optional flipping 429 LIBYUV_API 430 int I400Mirror(const uint8* src_y, int src_stride_y, 431 uint8* dst_y, int dst_stride_y, 432 int width, int height) { 433 if (!src_y || !dst_y || 434 width <= 0 || height == 0) { 435 return -1; 436 } 437 // Negative height means invert the image. 438 if (height < 0) { 439 height = -height; 440 src_y = src_y + (height - 1) * src_stride_y; 441 src_stride_y = -src_stride_y; 442 } 443 444 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 445 return 0; 446 } 447 448 // Mirror I420 with optional flipping 449 LIBYUV_API 450 int I420Mirror(const uint8* src_y, int src_stride_y, 451 const uint8* src_u, int src_stride_u, 452 const uint8* src_v, int src_stride_v, 453 uint8* dst_y, int dst_stride_y, 454 uint8* dst_u, int dst_stride_u, 455 uint8* dst_v, int dst_stride_v, 456 int width, int height) { 457 int halfwidth = (width + 1) >> 1; 458 int halfheight = (height + 1) >> 1; 459 if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || 460 width <= 0 || height == 0) { 461 return -1; 462 } 463 // Negative height means invert the image. 464 if (height < 0) { 465 height = -height; 466 halfheight = (height + 1) >> 1; 467 src_y = src_y + (height - 1) * src_stride_y; 468 src_u = src_u + (halfheight - 1) * src_stride_u; 469 src_v = src_v + (halfheight - 1) * src_stride_v; 470 src_stride_y = -src_stride_y; 471 src_stride_u = -src_stride_u; 472 src_stride_v = -src_stride_v; 473 } 474 475 if (dst_y) { 476 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 477 } 478 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); 479 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); 480 return 0; 481 } 482 483 // ARGB mirror. 484 LIBYUV_API 485 int ARGBMirror(const uint8* src_argb, int src_stride_argb, 486 uint8* dst_argb, int dst_stride_argb, 487 int width, int height) { 488 int y; 489 void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = 490 ARGBMirrorRow_C; 491 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 492 return -1; 493 } 494 // Negative height means invert the image. 495 if (height < 0) { 496 height = -height; 497 src_argb = src_argb + (height - 1) * src_stride_argb; 498 src_stride_argb = -src_stride_argb; 499 } 500 501 #if defined(HAS_ARGBMIRRORROW_SSSE3) 502 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && 503 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 504 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 505 ARGBMirrorRow = ARGBMirrorRow_SSSE3; 506 } 507 #endif 508 #if defined(HAS_ARGBMIRRORROW_AVX2) 509 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { 510 ARGBMirrorRow = ARGBMirrorRow_AVX2; 511 } 512 #endif 513 #if defined(HAS_ARGBMIRRORROW_NEON) 514 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) { 515 ARGBMirrorRow = ARGBMirrorRow_NEON; 516 } 517 #endif 518 519 // Mirror plane 520 for (y = 0; y < height; ++y) { 521 ARGBMirrorRow(src_argb, dst_argb, width); 522 src_argb += src_stride_argb; 523 dst_argb += dst_stride_argb; 524 } 525 return 0; 526 } 527 528 // Get a blender that optimized for the CPU, alignment and pixel count. 529 // As there are 6 blenders to choose from, the caller should try to use 530 // the same blend function for all pixels if possible. 531 LIBYUV_API 532 ARGBBlendRow GetARGBBlend() { 533 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, 534 uint8* dst_argb, int width) = ARGBBlendRow_C; 535 #if defined(HAS_ARGBBLENDROW_SSSE3) 536 if (TestCpuFlag(kCpuHasSSSE3)) { 537 ARGBBlendRow = ARGBBlendRow_SSSE3; 538 return ARGBBlendRow; 539 } 540 #endif 541 #if defined(HAS_ARGBBLENDROW_SSE2) 542 if (TestCpuFlag(kCpuHasSSE2)) { 543 ARGBBlendRow = ARGBBlendRow_SSE2; 544 } 545 #endif 546 #if defined(HAS_ARGBBLENDROW_NEON) 547 if (TestCpuFlag(kCpuHasNEON)) { 548 ARGBBlendRow = ARGBBlendRow_NEON; 549 } 550 #endif 551 return ARGBBlendRow; 552 } 553 554 // Alpha Blend 2 ARGB images and store to destination. 555 LIBYUV_API 556 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, 557 const uint8* src_argb1, int src_stride_argb1, 558 uint8* dst_argb, int dst_stride_argb, 559 int width, int height) { 560 int y; 561 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, 562 uint8* dst_argb, int width) = GetARGBBlend(); 563 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 564 return -1; 565 } 566 // Negative height means invert the image. 567 if (height < 0) { 568 height = -height; 569 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 570 dst_stride_argb = -dst_stride_argb; 571 } 572 // Coalesce rows. 573 if (src_stride_argb0 == width * 4 && 574 src_stride_argb1 == width * 4 && 575 dst_stride_argb == width * 4) { 576 width *= height; 577 height = 1; 578 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 579 } 580 581 for (y = 0; y < height; ++y) { 582 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width); 583 src_argb0 += src_stride_argb0; 584 src_argb1 += src_stride_argb1; 585 dst_argb += dst_stride_argb; 586 } 587 return 0; 588 } 589 590 // Multiply 2 ARGB images and store to destination. 591 LIBYUV_API 592 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, 593 const uint8* src_argb1, int src_stride_argb1, 594 uint8* dst_argb, int dst_stride_argb, 595 int width, int height) { 596 int y; 597 void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst, 598 int width) = ARGBMultiplyRow_C; 599 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 600 return -1; 601 } 602 // Negative height means invert the image. 603 if (height < 0) { 604 height = -height; 605 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 606 dst_stride_argb = -dst_stride_argb; 607 } 608 // Coalesce rows. 609 if (src_stride_argb0 == width * 4 && 610 src_stride_argb1 == width * 4 && 611 dst_stride_argb == width * 4) { 612 width *= height; 613 height = 1; 614 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 615 } 616 #if defined(HAS_ARGBMULTIPLYROW_SSE2) 617 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 618 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2; 619 if (IS_ALIGNED(width, 4)) { 620 ARGBMultiplyRow = ARGBMultiplyRow_SSE2; 621 } 622 } 623 #endif 624 #if defined(HAS_ARGBMULTIPLYROW_AVX2) 625 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 626 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2; 627 if (IS_ALIGNED(width, 8)) { 628 ARGBMultiplyRow = ARGBMultiplyRow_AVX2; 629 } 630 } 631 #endif 632 #if defined(HAS_ARGBMULTIPLYROW_NEON) 633 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 634 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON; 635 if (IS_ALIGNED(width, 8)) { 636 ARGBMultiplyRow = ARGBMultiplyRow_NEON; 637 } 638 } 639 #endif 640 641 // Multiply plane 642 for (y = 0; y < height; ++y) { 643 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width); 644 src_argb0 += src_stride_argb0; 645 src_argb1 += src_stride_argb1; 646 dst_argb += dst_stride_argb; 647 } 648 return 0; 649 } 650 651 // Add 2 ARGB images and store to destination. 652 LIBYUV_API 653 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, 654 const uint8* src_argb1, int src_stride_argb1, 655 uint8* dst_argb, int dst_stride_argb, 656 int width, int height) { 657 int y; 658 void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst, 659 int width) = ARGBAddRow_C; 660 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 661 return -1; 662 } 663 // Negative height means invert the image. 664 if (height < 0) { 665 height = -height; 666 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 667 dst_stride_argb = -dst_stride_argb; 668 } 669 // Coalesce rows. 670 if (src_stride_argb0 == width * 4 && 671 src_stride_argb1 == width * 4 && 672 dst_stride_argb == width * 4) { 673 width *= height; 674 height = 1; 675 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 676 } 677 #if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER) 678 if (TestCpuFlag(kCpuHasSSE2)) { 679 ARGBAddRow = ARGBAddRow_SSE2; 680 } 681 #endif 682 #if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER) 683 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 684 ARGBAddRow = ARGBAddRow_Any_SSE2; 685 if (IS_ALIGNED(width, 4)) { 686 ARGBAddRow = ARGBAddRow_SSE2; 687 } 688 } 689 #endif 690 #if defined(HAS_ARGBADDROW_AVX2) 691 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 692 ARGBAddRow = ARGBAddRow_Any_AVX2; 693 if (IS_ALIGNED(width, 8)) { 694 ARGBAddRow = ARGBAddRow_AVX2; 695 } 696 } 697 #endif 698 #if defined(HAS_ARGBADDROW_NEON) 699 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 700 ARGBAddRow = ARGBAddRow_Any_NEON; 701 if (IS_ALIGNED(width, 8)) { 702 ARGBAddRow = ARGBAddRow_NEON; 703 } 704 } 705 #endif 706 707 // Add plane 708 for (y = 0; y < height; ++y) { 709 ARGBAddRow(src_argb0, src_argb1, dst_argb, width); 710 src_argb0 += src_stride_argb0; 711 src_argb1 += src_stride_argb1; 712 dst_argb += dst_stride_argb; 713 } 714 return 0; 715 } 716 717 // Subtract 2 ARGB images and store to destination. 718 LIBYUV_API 719 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, 720 const uint8* src_argb1, int src_stride_argb1, 721 uint8* dst_argb, int dst_stride_argb, 722 int width, int height) { 723 int y; 724 void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst, 725 int width) = ARGBSubtractRow_C; 726 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 727 return -1; 728 } 729 // Negative height means invert the image. 730 if (height < 0) { 731 height = -height; 732 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 733 dst_stride_argb = -dst_stride_argb; 734 } 735 // Coalesce rows. 736 if (src_stride_argb0 == width * 4 && 737 src_stride_argb1 == width * 4 && 738 dst_stride_argb == width * 4) { 739 width *= height; 740 height = 1; 741 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 742 } 743 #if defined(HAS_ARGBSUBTRACTROW_SSE2) 744 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 745 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2; 746 if (IS_ALIGNED(width, 4)) { 747 ARGBSubtractRow = ARGBSubtractRow_SSE2; 748 } 749 } 750 #endif 751 #if defined(HAS_ARGBSUBTRACTROW_AVX2) 752 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 753 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2; 754 if (IS_ALIGNED(width, 8)) { 755 ARGBSubtractRow = ARGBSubtractRow_AVX2; 756 } 757 } 758 #endif 759 #if defined(HAS_ARGBSUBTRACTROW_NEON) 760 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 761 ARGBSubtractRow = ARGBSubtractRow_Any_NEON; 762 if (IS_ALIGNED(width, 8)) { 763 ARGBSubtractRow = ARGBSubtractRow_NEON; 764 } 765 } 766 #endif 767 768 // Subtract plane 769 for (y = 0; y < height; ++y) { 770 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width); 771 src_argb0 += src_stride_argb0; 772 src_argb1 += src_stride_argb1; 773 dst_argb += dst_stride_argb; 774 } 775 return 0; 776 } 777 778 // Convert I422 to BGRA. 779 LIBYUV_API 780 int I422ToBGRA(const uint8* src_y, int src_stride_y, 781 const uint8* src_u, int src_stride_u, 782 const uint8* src_v, int src_stride_v, 783 uint8* dst_bgra, int dst_stride_bgra, 784 int width, int height) { 785 int y; 786 void (*I422ToBGRARow)(const uint8* y_buf, 787 const uint8* u_buf, 788 const uint8* v_buf, 789 uint8* rgb_buf, 790 int width) = I422ToBGRARow_C; 791 if (!src_y || !src_u || !src_v || 792 !dst_bgra || 793 width <= 0 || height == 0) { 794 return -1; 795 } 796 // Negative height means invert the image. 797 if (height < 0) { 798 height = -height; 799 dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; 800 dst_stride_bgra = -dst_stride_bgra; 801 } 802 // Coalesce rows. 803 if (src_stride_y == width && 804 src_stride_u * 2 == width && 805 src_stride_v * 2 == width && 806 dst_stride_bgra == width * 4) { 807 width *= height; 808 height = 1; 809 src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0; 810 } 811 #if defined(HAS_I422TOBGRAROW_NEON) 812 if (TestCpuFlag(kCpuHasNEON)) { 813 I422ToBGRARow = I422ToBGRARow_Any_NEON; 814 if (IS_ALIGNED(width, 16)) { 815 I422ToBGRARow = I422ToBGRARow_NEON; 816 } 817 } 818 #elif defined(HAS_I422TOBGRAROW_SSSE3) 819 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 820 I422ToBGRARow = I422ToBGRARow_Any_SSSE3; 821 if (IS_ALIGNED(width, 8)) { 822 I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3; 823 if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) { 824 I422ToBGRARow = I422ToBGRARow_SSSE3; 825 } 826 } 827 } 828 #elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2) 829 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && 830 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && 831 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && 832 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && 833 IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) { 834 I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2; 835 } 836 #endif 837 838 for (y = 0; y < height; ++y) { 839 I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width); 840 dst_bgra += dst_stride_bgra; 841 src_y += src_stride_y; 842 src_u += src_stride_u; 843 src_v += src_stride_v; 844 } 845 return 0; 846 } 847 848 // Convert I422 to ABGR. 849 LIBYUV_API 850 int I422ToABGR(const uint8* src_y, int src_stride_y, 851 const uint8* src_u, int src_stride_u, 852 const uint8* src_v, int src_stride_v, 853 uint8* dst_abgr, int dst_stride_abgr, 854 int width, int height) { 855 int y; 856 void (*I422ToABGRRow)(const uint8* y_buf, 857 const uint8* u_buf, 858 const uint8* v_buf, 859 uint8* rgb_buf, 860 int width) = I422ToABGRRow_C; 861 if (!src_y || !src_u || !src_v || 862 !dst_abgr || 863 width <= 0 || height == 0) { 864 return -1; 865 } 866 // Negative height means invert the image. 867 if (height < 0) { 868 height = -height; 869 dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; 870 dst_stride_abgr = -dst_stride_abgr; 871 } 872 // Coalesce rows. 873 if (src_stride_y == width && 874 src_stride_u * 2 == width && 875 src_stride_v * 2 == width && 876 dst_stride_abgr == width * 4) { 877 width *= height; 878 height = 1; 879 src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0; 880 } 881 #if defined(HAS_I422TOABGRROW_NEON) 882 if (TestCpuFlag(kCpuHasNEON)) { 883 I422ToABGRRow = I422ToABGRRow_Any_NEON; 884 if (IS_ALIGNED(width, 16)) { 885 I422ToABGRRow = I422ToABGRRow_NEON; 886 } 887 } 888 #elif defined(HAS_I422TOABGRROW_SSSE3) 889 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 890 I422ToABGRRow = I422ToABGRRow_Any_SSSE3; 891 if (IS_ALIGNED(width, 8)) { 892 I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3; 893 if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) { 894 I422ToABGRRow = I422ToABGRRow_SSSE3; 895 } 896 } 897 } 898 #endif 899 900 for (y = 0; y < height; ++y) { 901 I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); 902 dst_abgr += dst_stride_abgr; 903 src_y += src_stride_y; 904 src_u += src_stride_u; 905 src_v += src_stride_v; 906 } 907 return 0; 908 } 909 910 // Convert I422 to RGBA. 911 LIBYUV_API 912 int I422ToRGBA(const uint8* src_y, int src_stride_y, 913 const uint8* src_u, int src_stride_u, 914 const uint8* src_v, int src_stride_v, 915 uint8* dst_rgba, int dst_stride_rgba, 916 int width, int height) { 917 int y; 918 void (*I422ToRGBARow)(const uint8* y_buf, 919 const uint8* u_buf, 920 const uint8* v_buf, 921 uint8* rgb_buf, 922 int width) = I422ToRGBARow_C; 923 if (!src_y || !src_u || !src_v || 924 !dst_rgba || 925 width <= 0 || height == 0) { 926 return -1; 927 } 928 // Negative height means invert the image. 929 if (height < 0) { 930 height = -height; 931 dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; 932 dst_stride_rgba = -dst_stride_rgba; 933 } 934 // Coalesce rows. 935 if (src_stride_y == width && 936 src_stride_u * 2 == width && 937 src_stride_v * 2 == width && 938 dst_stride_rgba == width * 4) { 939 width *= height; 940 height = 1; 941 src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0; 942 } 943 #if defined(HAS_I422TORGBAROW_NEON) 944 if (TestCpuFlag(kCpuHasNEON)) { 945 I422ToRGBARow = I422ToRGBARow_Any_NEON; 946 if (IS_ALIGNED(width, 16)) { 947 I422ToRGBARow = I422ToRGBARow_NEON; 948 } 949 } 950 #elif defined(HAS_I422TORGBAROW_SSSE3) 951 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 952 I422ToRGBARow = I422ToRGBARow_Any_SSSE3; 953 if (IS_ALIGNED(width, 8)) { 954 I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3; 955 if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) { 956 I422ToRGBARow = I422ToRGBARow_SSSE3; 957 } 958 } 959 } 960 #endif 961 962 for (y = 0; y < height; ++y) { 963 I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width); 964 dst_rgba += dst_stride_rgba; 965 src_y += src_stride_y; 966 src_u += src_stride_u; 967 src_v += src_stride_v; 968 } 969 return 0; 970 } 971 972 // Convert NV12 to RGB565. 973 LIBYUV_API 974 int NV12ToRGB565(const uint8* src_y, int src_stride_y, 975 const uint8* src_uv, int src_stride_uv, 976 uint8* dst_rgb565, int dst_stride_rgb565, 977 int width, int height) { 978 int y; 979 void (*NV12ToRGB565Row)(const uint8* y_buf, 980 const uint8* uv_buf, 981 uint8* rgb_buf, 982 int width) = NV12ToRGB565Row_C; 983 if (!src_y || !src_uv || !dst_rgb565 || 984 width <= 0 || height == 0) { 985 return -1; 986 } 987 // Negative height means invert the image. 988 if (height < 0) { 989 height = -height; 990 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; 991 dst_stride_rgb565 = -dst_stride_rgb565; 992 } 993 #if defined(HAS_NV12TORGB565ROW_SSSE3) 994 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 995 NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3; 996 if (IS_ALIGNED(width, 8)) { 997 NV12ToRGB565Row = NV12ToRGB565Row_SSSE3; 998 } 999 } 1000 #elif defined(HAS_NV12TORGB565ROW_NEON) 1001 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1002 NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON; 1003 if (IS_ALIGNED(width, 8)) { 1004 NV12ToRGB565Row = NV12ToRGB565Row_NEON; 1005 } 1006 } 1007 #endif 1008 1009 for (y = 0; y < height; ++y) { 1010 NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width); 1011 dst_rgb565 += dst_stride_rgb565; 1012 src_y += src_stride_y; 1013 if (y & 1) { 1014 src_uv += src_stride_uv; 1015 } 1016 } 1017 return 0; 1018 } 1019 1020 // Convert NV21 to RGB565. 1021 LIBYUV_API 1022 int NV21ToRGB565(const uint8* src_y, int src_stride_y, 1023 const uint8* src_vu, int src_stride_vu, 1024 uint8* dst_rgb565, int dst_stride_rgb565, 1025 int width, int height) { 1026 int y; 1027 void (*NV21ToRGB565Row)(const uint8* y_buf, 1028 const uint8* src_vu, 1029 uint8* rgb_buf, 1030 int width) = NV21ToRGB565Row_C; 1031 if (!src_y || !src_vu || !dst_rgb565 || 1032 width <= 0 || height == 0) { 1033 return -1; 1034 } 1035 // Negative height means invert the image. 1036 if (height < 0) { 1037 height = -height; 1038 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; 1039 dst_stride_rgb565 = -dst_stride_rgb565; 1040 } 1041 #if defined(HAS_NV21TORGB565ROW_SSSE3) 1042 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 1043 NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3; 1044 if (IS_ALIGNED(width, 8)) { 1045 NV21ToRGB565Row = NV21ToRGB565Row_SSSE3; 1046 } 1047 } 1048 #elif defined(HAS_NV21TORGB565ROW_NEON) 1049 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1050 NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON; 1051 if (IS_ALIGNED(width, 8)) { 1052 NV21ToRGB565Row = NV21ToRGB565Row_NEON; 1053 } 1054 } 1055 #endif 1056 1057 for (y = 0; y < height; ++y) { 1058 NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width); 1059 dst_rgb565 += dst_stride_rgb565; 1060 src_y += src_stride_y; 1061 if (y & 1) { 1062 src_vu += src_stride_vu; 1063 } 1064 } 1065 return 0; 1066 } 1067 1068 LIBYUV_API 1069 void SetPlane(uint8* dst_y, int dst_stride_y, 1070 int width, int height, 1071 uint32 value) { 1072 int y; 1073 uint32 v32 = value | (value << 8) | (value << 16) | (value << 24); 1074 void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C; 1075 // Coalesce rows. 1076 if (dst_stride_y == width) { 1077 width *= height; 1078 height = 1; 1079 dst_stride_y = 0; 1080 } 1081 #if defined(HAS_SETROW_NEON) 1082 if (TestCpuFlag(kCpuHasNEON) && 1083 IS_ALIGNED(width, 16) && 1084 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 1085 SetRow = SetRow_NEON; 1086 } 1087 #endif 1088 #if defined(HAS_SETROW_X86) 1089 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { 1090 SetRow = SetRow_X86; 1091 } 1092 #endif 1093 1094 // Set plane 1095 for (y = 0; y < height; ++y) { 1096 SetRow(dst_y, v32, width); 1097 dst_y += dst_stride_y; 1098 } 1099 } 1100 1101 // Draw a rectangle into I420 1102 LIBYUV_API 1103 int I420Rect(uint8* dst_y, int dst_stride_y, 1104 uint8* dst_u, int dst_stride_u, 1105 uint8* dst_v, int dst_stride_v, 1106 int x, int y, 1107 int width, int height, 1108 int value_y, int value_u, int value_v) { 1109 int halfwidth = (width + 1) >> 1; 1110 int halfheight = (height + 1) >> 1; 1111 uint8* start_y = dst_y + y * dst_stride_y + x; 1112 uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); 1113 uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); 1114 if (!dst_y || !dst_u || !dst_v || 1115 width <= 0 || height <= 0 || 1116 x < 0 || y < 0 || 1117 value_y < 0 || value_y > 255 || 1118 value_u < 0 || value_u > 255 || 1119 value_v < 0 || value_v > 255) { 1120 return -1; 1121 } 1122 1123 SetPlane(start_y, dst_stride_y, width, height, value_y); 1124 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u); 1125 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v); 1126 return 0; 1127 } 1128 1129 // Draw a rectangle into ARGB 1130 LIBYUV_API 1131 int ARGBRect(uint8* dst_argb, int dst_stride_argb, 1132 int dst_x, int dst_y, 1133 int width, int height, 1134 uint32 value) { 1135 if (!dst_argb || 1136 width <= 0 || height <= 0 || 1137 dst_x < 0 || dst_y < 0) { 1138 return -1; 1139 } 1140 dst_argb += dst_y * dst_stride_argb + dst_x * 4; 1141 // Coalesce rows. 1142 if (dst_stride_argb == width * 4) { 1143 width *= height; 1144 height = 1; 1145 dst_stride_argb = 0; 1146 } 1147 #if defined(HAS_SETROW_NEON) 1148 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && 1149 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1150 ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height); 1151 return 0; 1152 } 1153 #endif 1154 #if defined(HAS_SETROW_X86) 1155 if (TestCpuFlag(kCpuHasX86)) { 1156 ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height); 1157 return 0; 1158 } 1159 #endif 1160 ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height); 1161 return 0; 1162 } 1163 1164 // Convert unattentuated ARGB to preattenuated ARGB. 1165 // An unattenutated ARGB alpha blend uses the formula 1166 // p = a * f + (1 - a) * b 1167 // where 1168 // p is output pixel 1169 // f is foreground pixel 1170 // b is background pixel 1171 // a is alpha value from foreground pixel 1172 // An preattenutated ARGB alpha blend uses the formula 1173 // p = f + (1 - a) * b 1174 // where 1175 // f is foreground pixel premultiplied by alpha 1176 1177 LIBYUV_API 1178 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, 1179 uint8* dst_argb, int dst_stride_argb, 1180 int width, int height) { 1181 int y; 1182 void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, 1183 int width) = ARGBAttenuateRow_C; 1184 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1185 return -1; 1186 } 1187 if (height < 0) { 1188 height = -height; 1189 src_argb = src_argb + (height - 1) * src_stride_argb; 1190 src_stride_argb = -src_stride_argb; 1191 } 1192 // Coalesce rows. 1193 if (src_stride_argb == width * 4 && 1194 dst_stride_argb == width * 4) { 1195 width *= height; 1196 height = 1; 1197 src_stride_argb = dst_stride_argb = 0; 1198 } 1199 #if defined(HAS_ARGBATTENUATEROW_SSE2) 1200 if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && 1201 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1202 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1203 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2; 1204 if (IS_ALIGNED(width, 4)) { 1205 ARGBAttenuateRow = ARGBAttenuateRow_SSE2; 1206 } 1207 } 1208 #endif 1209 #if defined(HAS_ARGBATTENUATEROW_SSSE3) 1210 if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { 1211 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; 1212 if (IS_ALIGNED(width, 4)) { 1213 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; 1214 } 1215 } 1216 #endif 1217 #if defined(HAS_ARGBATTENUATEROW_AVX2) 1218 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1219 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; 1220 if (IS_ALIGNED(width, 8)) { 1221 ARGBAttenuateRow = ARGBAttenuateRow_AVX2; 1222 } 1223 } 1224 #endif 1225 #if defined(HAS_ARGBATTENUATEROW_NEON) 1226 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1227 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; 1228 if (IS_ALIGNED(width, 8)) { 1229 ARGBAttenuateRow = ARGBAttenuateRow_NEON; 1230 } 1231 } 1232 #endif 1233 1234 for (y = 0; y < height; ++y) { 1235 ARGBAttenuateRow(src_argb, dst_argb, width); 1236 src_argb += src_stride_argb; 1237 dst_argb += dst_stride_argb; 1238 } 1239 return 0; 1240 } 1241 1242 // Convert preattentuated ARGB to unattenuated ARGB. 1243 LIBYUV_API 1244 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, 1245 uint8* dst_argb, int dst_stride_argb, 1246 int width, int height) { 1247 int y; 1248 void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, 1249 int width) = ARGBUnattenuateRow_C; 1250 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1251 return -1; 1252 } 1253 if (height < 0) { 1254 height = -height; 1255 src_argb = src_argb + (height - 1) * src_stride_argb; 1256 src_stride_argb = -src_stride_argb; 1257 } 1258 // Coalesce rows. 1259 if (src_stride_argb == width * 4 && 1260 dst_stride_argb == width * 4) { 1261 width *= height; 1262 height = 1; 1263 src_stride_argb = dst_stride_argb = 0; 1264 } 1265 #if defined(HAS_ARGBUNATTENUATEROW_SSE2) 1266 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1267 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2; 1268 if (IS_ALIGNED(width, 4)) { 1269 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; 1270 } 1271 } 1272 #endif 1273 #if defined(HAS_ARGBUNATTENUATEROW_AVX2) 1274 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1275 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2; 1276 if (IS_ALIGNED(width, 8)) { 1277 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2; 1278 } 1279 } 1280 #endif 1281 // TODO(fbarchard): Neon version. 1282 1283 for (y = 0; y < height; ++y) { 1284 ARGBUnattenuateRow(src_argb, dst_argb, width); 1285 src_argb += src_stride_argb; 1286 dst_argb += dst_stride_argb; 1287 } 1288 return 0; 1289 } 1290 1291 // Convert ARGB to Grayed ARGB. 1292 LIBYUV_API 1293 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, 1294 uint8* dst_argb, int dst_stride_argb, 1295 int width, int height) { 1296 int y; 1297 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, 1298 int width) = ARGBGrayRow_C; 1299 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1300 return -1; 1301 } 1302 if (height < 0) { 1303 height = -height; 1304 src_argb = src_argb + (height - 1) * src_stride_argb; 1305 src_stride_argb = -src_stride_argb; 1306 } 1307 // Coalesce rows. 1308 if (src_stride_argb == width * 4 && 1309 dst_stride_argb == width * 4) { 1310 width *= height; 1311 height = 1; 1312 src_stride_argb = dst_stride_argb = 0; 1313 } 1314 #if defined(HAS_ARGBGRAYROW_SSSE3) 1315 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1316 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1317 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1318 ARGBGrayRow = ARGBGrayRow_SSSE3; 1319 } 1320 #elif defined(HAS_ARGBGRAYROW_NEON) 1321 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1322 ARGBGrayRow = ARGBGrayRow_NEON; 1323 } 1324 #endif 1325 1326 for (y = 0; y < height; ++y) { 1327 ARGBGrayRow(src_argb, dst_argb, width); 1328 src_argb += src_stride_argb; 1329 dst_argb += dst_stride_argb; 1330 } 1331 return 0; 1332 } 1333 1334 // Make a rectangle of ARGB gray scale. 1335 LIBYUV_API 1336 int ARGBGray(uint8* dst_argb, int dst_stride_argb, 1337 int dst_x, int dst_y, 1338 int width, int height) { 1339 int y; 1340 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, 1341 int width) = ARGBGrayRow_C; 1342 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1343 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { 1344 return -1; 1345 } 1346 // Coalesce rows. 1347 if (dst_stride_argb == width * 4) { 1348 width *= height; 1349 height = 1; 1350 dst_stride_argb = 0; 1351 } 1352 #if defined(HAS_ARGBGRAYROW_SSSE3) 1353 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1354 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1355 ARGBGrayRow = ARGBGrayRow_SSSE3; 1356 } 1357 #elif defined(HAS_ARGBGRAYROW_NEON) 1358 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1359 ARGBGrayRow = ARGBGrayRow_NEON; 1360 } 1361 #endif 1362 for (y = 0; y < height; ++y) { 1363 ARGBGrayRow(dst, dst, width); 1364 dst += dst_stride_argb; 1365 } 1366 return 0; 1367 } 1368 1369 // Make a rectangle of ARGB Sepia tone. 1370 LIBYUV_API 1371 int ARGBSepia(uint8* dst_argb, int dst_stride_argb, 1372 int dst_x, int dst_y, int width, int height) { 1373 int y; 1374 void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C; 1375 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1376 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { 1377 return -1; 1378 } 1379 // Coalesce rows. 1380 if (dst_stride_argb == width * 4) { 1381 width *= height; 1382 height = 1; 1383 dst_stride_argb = 0; 1384 } 1385 #if defined(HAS_ARGBSEPIAROW_SSSE3) 1386 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1387 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1388 ARGBSepiaRow = ARGBSepiaRow_SSSE3; 1389 } 1390 #elif defined(HAS_ARGBSEPIAROW_NEON) 1391 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1392 ARGBSepiaRow = ARGBSepiaRow_NEON; 1393 } 1394 #endif 1395 for (y = 0; y < height; ++y) { 1396 ARGBSepiaRow(dst, width); 1397 dst += dst_stride_argb; 1398 } 1399 return 0; 1400 } 1401 1402 // Apply a 4x4 matrix to each ARGB pixel. 1403 // Note: Normally for shading, but can be used to swizzle or invert. 1404 LIBYUV_API 1405 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, 1406 uint8* dst_argb, int dst_stride_argb, 1407 const int8* matrix_argb, 1408 int width, int height) { 1409 int y; 1410 void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb, 1411 const int8* matrix_argb, int width) = ARGBColorMatrixRow_C; 1412 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) { 1413 return -1; 1414 } 1415 if (height < 0) { 1416 height = -height; 1417 src_argb = src_argb + (height - 1) * src_stride_argb; 1418 src_stride_argb = -src_stride_argb; 1419 } 1420 // Coalesce rows. 1421 if (src_stride_argb == width * 4 && 1422 dst_stride_argb == width * 4) { 1423 width *= height; 1424 height = 1; 1425 src_stride_argb = dst_stride_argb = 0; 1426 } 1427 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3) 1428 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1429 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1430 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3; 1431 } 1432 #elif defined(HAS_ARGBCOLORMATRIXROW_NEON) 1433 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1434 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON; 1435 } 1436 #endif 1437 for (y = 0; y < height; ++y) { 1438 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); 1439 src_argb += src_stride_argb; 1440 dst_argb += dst_stride_argb; 1441 } 1442 return 0; 1443 } 1444 1445 // Apply a 4x3 matrix to each ARGB pixel. 1446 // Deprecated. 1447 LIBYUV_API 1448 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, 1449 const int8* matrix_rgb, 1450 int dst_x, int dst_y, int width, int height) { 1451 SIMD_ALIGNED(int8 matrix_argb[16]); 1452 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1453 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || 1454 dst_x < 0 || dst_y < 0) { 1455 return -1; 1456 } 1457 1458 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix. 1459 matrix_argb[0] = matrix_rgb[0] / 2; 1460 matrix_argb[1] = matrix_rgb[1] / 2; 1461 matrix_argb[2] = matrix_rgb[2] / 2; 1462 matrix_argb[3] = matrix_rgb[3] / 2; 1463 matrix_argb[4] = matrix_rgb[4] / 2; 1464 matrix_argb[5] = matrix_rgb[5] / 2; 1465 matrix_argb[6] = matrix_rgb[6] / 2; 1466 matrix_argb[7] = matrix_rgb[7] / 2; 1467 matrix_argb[8] = matrix_rgb[8] / 2; 1468 matrix_argb[9] = matrix_rgb[9] / 2; 1469 matrix_argb[10] = matrix_rgb[10] / 2; 1470 matrix_argb[11] = matrix_rgb[11] / 2; 1471 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0; 1472 matrix_argb[15] = 64; // 1.0 1473 1474 return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, 1475 dst, dst_stride_argb, 1476 &matrix_argb[0], width, height); 1477 } 1478 1479 // Apply a color table each ARGB pixel. 1480 // Table contains 256 ARGB values. 1481 LIBYUV_API 1482 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, 1483 const uint8* table_argb, 1484 int dst_x, int dst_y, int width, int height) { 1485 int y; 1486 void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, 1487 int width) = ARGBColorTableRow_C; 1488 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1489 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || 1490 dst_x < 0 || dst_y < 0) { 1491 return -1; 1492 } 1493 // Coalesce rows. 1494 if (dst_stride_argb == width * 4) { 1495 width *= height; 1496 height = 1; 1497 dst_stride_argb = 0; 1498 } 1499 #if defined(HAS_ARGBCOLORTABLEROW_X86) 1500 if (TestCpuFlag(kCpuHasX86)) { 1501 ARGBColorTableRow = ARGBColorTableRow_X86; 1502 } 1503 #endif 1504 for (y = 0; y < height; ++y) { 1505 ARGBColorTableRow(dst, table_argb, width); 1506 dst += dst_stride_argb; 1507 } 1508 return 0; 1509 } 1510 1511 // Apply a color table each ARGB pixel but preserve destination alpha. 1512 // Table contains 256 ARGB values. 1513 LIBYUV_API 1514 int RGBColorTable(uint8* dst_argb, int dst_stride_argb, 1515 const uint8* table_argb, 1516 int dst_x, int dst_y, int width, int height) { 1517 int y; 1518 void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, 1519 int width) = RGBColorTableRow_C; 1520 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1521 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || 1522 dst_x < 0 || dst_y < 0) { 1523 return -1; 1524 } 1525 // Coalesce rows. 1526 if (dst_stride_argb == width * 4) { 1527 width *= height; 1528 height = 1; 1529 dst_stride_argb = 0; 1530 } 1531 #if defined(HAS_RGBCOLORTABLEROW_X86) 1532 if (TestCpuFlag(kCpuHasX86)) { 1533 RGBColorTableRow = RGBColorTableRow_X86; 1534 } 1535 #endif 1536 for (y = 0; y < height; ++y) { 1537 RGBColorTableRow(dst, table_argb, width); 1538 dst += dst_stride_argb; 1539 } 1540 return 0; 1541 } 1542 1543 // ARGBQuantize is used to posterize art. 1544 // e.g. rgb / qvalue * qvalue + qvalue / 2 1545 // But the low levels implement efficiently with 3 parameters, and could be 1546 // used for other high level operations. 1547 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; 1548 // where scale is 1 / interval_size as a fixed point value. 1549 // The divide is replaces with a multiply by reciprocal fixed point multiply. 1550 // Caveat - although SSE2 saturates, the C function does not and should be used 1551 // with care if doing anything but quantization. 1552 LIBYUV_API 1553 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, 1554 int scale, int interval_size, int interval_offset, 1555 int dst_x, int dst_y, int width, int height) { 1556 int y; 1557 void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size, 1558 int interval_offset, int width) = ARGBQuantizeRow_C; 1559 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1560 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || 1561 interval_size < 1 || interval_size > 255) { 1562 return -1; 1563 } 1564 // Coalesce rows. 1565 if (dst_stride_argb == width * 4) { 1566 width *= height; 1567 height = 1; 1568 dst_stride_argb = 0; 1569 } 1570 #if defined(HAS_ARGBQUANTIZEROW_SSE2) 1571 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && 1572 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1573 ARGBQuantizeRow = ARGBQuantizeRow_SSE2; 1574 } 1575 #elif defined(HAS_ARGBQUANTIZEROW_NEON) 1576 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1577 ARGBQuantizeRow = ARGBQuantizeRow_NEON; 1578 } 1579 #endif 1580 for (y = 0; y < height; ++y) { 1581 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width); 1582 dst += dst_stride_argb; 1583 } 1584 return 0; 1585 } 1586 1587 // Computes table of cumulative sum for image where the value is the sum 1588 // of all values above and to the left of the entry. Used by ARGBBlur. 1589 LIBYUV_API 1590 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, 1591 int32* dst_cumsum, int dst_stride32_cumsum, 1592 int width, int height) { 1593 int y; 1594 void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, 1595 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; 1596 int32* previous_cumsum = dst_cumsum; 1597 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) { 1598 return -1; 1599 } 1600 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) 1601 if (TestCpuFlag(kCpuHasSSE2)) { 1602 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; 1603 } 1604 #endif 1605 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel. 1606 for (y = 0; y < height; ++y) { 1607 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width); 1608 previous_cumsum = dst_cumsum; 1609 dst_cumsum += dst_stride32_cumsum; 1610 src_argb += src_stride_argb; 1611 } 1612 return 0; 1613 } 1614 1615 // Blur ARGB image. 1616 // Caller should allocate CumulativeSum table of width * height * 16 bytes 1617 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory 1618 // as the buffer is treated as circular. 1619 LIBYUV_API 1620 int ARGBBlur(const uint8* src_argb, int src_stride_argb, 1621 uint8* dst_argb, int dst_stride_argb, 1622 int32* dst_cumsum, int dst_stride32_cumsum, 1623 int width, int height, int radius) { 1624 int y; 1625 void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum, 1626 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; 1627 void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft, 1628 int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C; 1629 int32* cumsum_bot_row; 1630 int32* max_cumsum_bot_row; 1631 int32* cumsum_top_row; 1632 1633 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1634 return -1; 1635 } 1636 if (height < 0) { 1637 height = -height; 1638 src_argb = src_argb + (height - 1) * src_stride_argb; 1639 src_stride_argb = -src_stride_argb; 1640 } 1641 if (radius > height) { 1642 radius = height; 1643 } 1644 if (radius > (width / 2 - 1)) { 1645 radius = width / 2 - 1; 1646 } 1647 if (radius <= 0) { 1648 return -1; 1649 } 1650 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) 1651 if (TestCpuFlag(kCpuHasSSE2)) { 1652 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; 1653 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2; 1654 } 1655 #endif 1656 // Compute enough CumulativeSum for first row to be blurred. After this 1657 // one row of CumulativeSum is updated at a time. 1658 ARGBComputeCumulativeSum(src_argb, src_stride_argb, 1659 dst_cumsum, dst_stride32_cumsum, 1660 width, radius); 1661 1662 src_argb = src_argb + radius * src_stride_argb; 1663 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum]; 1664 1665 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum]; 1666 cumsum_top_row = &dst_cumsum[0]; 1667 1668 for (y = 0; y < height; ++y) { 1669 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0; 1670 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1); 1671 int area = radius * (bot_y - top_y); 1672 int boxwidth = radius * 4; 1673 int x; 1674 int n; 1675 1676 // Increment cumsum_top_row pointer with circular buffer wrap around. 1677 if (top_y) { 1678 cumsum_top_row += dst_stride32_cumsum; 1679 if (cumsum_top_row >= max_cumsum_bot_row) { 1680 cumsum_top_row = dst_cumsum; 1681 } 1682 } 1683 // Increment cumsum_bot_row pointer with circular buffer wrap around and 1684 // then fill in a row of CumulativeSum. 1685 if ((y + radius) < height) { 1686 const int32* prev_cumsum_bot_row = cumsum_bot_row; 1687 cumsum_bot_row += dst_stride32_cumsum; 1688 if (cumsum_bot_row >= max_cumsum_bot_row) { 1689 cumsum_bot_row = dst_cumsum; 1690 } 1691 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row, 1692 width); 1693 src_argb += src_stride_argb; 1694 } 1695 1696 // Left clipped. 1697 for (x = 0; x < radius + 1; ++x) { 1698 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, 1699 boxwidth, area, &dst_argb[x * 4], 1); 1700 area += (bot_y - top_y); 1701 boxwidth += 4; 1702 } 1703 1704 // Middle unclipped. 1705 n = (width - 1) - radius - x + 1; 1706 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, 1707 boxwidth, area, &dst_argb[x * 4], n); 1708 1709 // Right clipped. 1710 for (x += n; x <= width - 1; ++x) { 1711 area -= (bot_y - top_y); 1712 boxwidth -= 4; 1713 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4, 1714 cumsum_bot_row + (x - radius - 1) * 4, 1715 boxwidth, area, &dst_argb[x * 4], 1); 1716 } 1717 dst_argb += dst_stride_argb; 1718 } 1719 return 0; 1720 } 1721 1722 // Multiply ARGB image by a specified ARGB value. 1723 LIBYUV_API 1724 int ARGBShade(const uint8* src_argb, int src_stride_argb, 1725 uint8* dst_argb, int dst_stride_argb, 1726 int width, int height, uint32 value) { 1727 int y; 1728 void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, 1729 int width, uint32 value) = ARGBShadeRow_C; 1730 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { 1731 return -1; 1732 } 1733 if (height < 0) { 1734 height = -height; 1735 src_argb = src_argb + (height - 1) * src_stride_argb; 1736 src_stride_argb = -src_stride_argb; 1737 } 1738 // Coalesce rows. 1739 if (src_stride_argb == width * 4 && 1740 dst_stride_argb == width * 4) { 1741 width *= height; 1742 height = 1; 1743 src_stride_argb = dst_stride_argb = 0; 1744 } 1745 #if defined(HAS_ARGBSHADEROW_SSE2) 1746 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && 1747 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1748 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1749 ARGBShadeRow = ARGBShadeRow_SSE2; 1750 } 1751 #elif defined(HAS_ARGBSHADEROW_NEON) 1752 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1753 ARGBShadeRow = ARGBShadeRow_NEON; 1754 } 1755 #endif 1756 1757 for (y = 0; y < height; ++y) { 1758 ARGBShadeRow(src_argb, dst_argb, width, value); 1759 src_argb += src_stride_argb; 1760 dst_argb += dst_stride_argb; 1761 } 1762 return 0; 1763 } 1764 1765 // Interpolate 2 ARGB images by specified amount (0 to 255). 1766 LIBYUV_API 1767 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, 1768 const uint8* src_argb1, int src_stride_argb1, 1769 uint8* dst_argb, int dst_stride_argb, 1770 int width, int height, int interpolation) { 1771 int y; 1772 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, 1773 ptrdiff_t src_stride, int dst_width, 1774 int source_y_fraction) = InterpolateRow_C; 1775 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 1776 return -1; 1777 } 1778 // Negative height means invert the image. 1779 if (height < 0) { 1780 height = -height; 1781 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1782 dst_stride_argb = -dst_stride_argb; 1783 } 1784 // Coalesce rows. 1785 if (src_stride_argb0 == width * 4 && 1786 src_stride_argb1 == width * 4 && 1787 dst_stride_argb == width * 4) { 1788 width *= height; 1789 height = 1; 1790 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 1791 } 1792 #if defined(HAS_INTERPOLATEROW_SSE2) 1793 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1794 InterpolateRow = InterpolateRow_Any_SSE2; 1795 if (IS_ALIGNED(width, 4)) { 1796 InterpolateRow = InterpolateRow_Unaligned_SSE2; 1797 if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && 1798 IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && 1799 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1800 InterpolateRow = InterpolateRow_SSE2; 1801 } 1802 } 1803 } 1804 #endif 1805 #if defined(HAS_INTERPOLATEROW_SSSE3) 1806 if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { 1807 InterpolateRow = InterpolateRow_Any_SSSE3; 1808 if (IS_ALIGNED(width, 4)) { 1809 InterpolateRow = InterpolateRow_Unaligned_SSSE3; 1810 if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && 1811 IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && 1812 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1813 InterpolateRow = InterpolateRow_SSSE3; 1814 } 1815 } 1816 } 1817 #endif 1818 #if defined(HAS_INTERPOLATEROW_AVX2) 1819 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1820 InterpolateRow = InterpolateRow_Any_AVX2; 1821 if (IS_ALIGNED(width, 8)) { 1822 InterpolateRow = InterpolateRow_AVX2; 1823 } 1824 } 1825 #endif 1826 #if defined(HAS_INTERPOLATEROW_NEON) 1827 if (TestCpuFlag(kCpuHasNEON) && width >= 4) { 1828 InterpolateRow = InterpolateRow_Any_NEON; 1829 if (IS_ALIGNED(width, 4)) { 1830 InterpolateRow = InterpolateRow_NEON; 1831 } 1832 } 1833 #endif 1834 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) 1835 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 && 1836 IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) && 1837 IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) && 1838 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { 1839 ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2; 1840 } 1841 #endif 1842 1843 for (y = 0; y < height; ++y) { 1844 InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0, 1845 width * 4, interpolation); 1846 src_argb0 += src_stride_argb0; 1847 src_argb1 += src_stride_argb1; 1848 dst_argb += dst_stride_argb; 1849 } 1850 return 0; 1851 } 1852 1853 // Shuffle ARGB channel order. e.g. BGRA to ARGB. 1854 LIBYUV_API 1855 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, 1856 uint8* dst_argb, int dst_stride_argb, 1857 const uint8* shuffler, int width, int height) { 1858 int y; 1859 void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb, 1860 const uint8* shuffler, int pix) = ARGBShuffleRow_C; 1861 if (!src_bgra || !dst_argb || 1862 width <= 0 || height == 0) { 1863 return -1; 1864 } 1865 // Negative height means invert the image. 1866 if (height < 0) { 1867 height = -height; 1868 src_bgra = src_bgra + (height - 1) * src_stride_bgra; 1869 src_stride_bgra = -src_stride_bgra; 1870 } 1871 // Coalesce rows. 1872 if (src_stride_bgra == width * 4 && 1873 dst_stride_argb == width * 4) { 1874 width *= height; 1875 height = 1; 1876 src_stride_bgra = dst_stride_argb = 0; 1877 } 1878 #if defined(HAS_ARGBSHUFFLEROW_SSE2) 1879 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1880 ARGBShuffleRow = ARGBShuffleRow_Any_SSE2; 1881 if (IS_ALIGNED(width, 4)) { 1882 ARGBShuffleRow = ARGBShuffleRow_SSE2; 1883 } 1884 } 1885 #endif 1886 #if defined(HAS_ARGBSHUFFLEROW_SSSE3) 1887 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 1888 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3; 1889 if (IS_ALIGNED(width, 8)) { 1890 ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3; 1891 if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) && 1892 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1893 ARGBShuffleRow = ARGBShuffleRow_SSSE3; 1894 } 1895 } 1896 } 1897 #endif 1898 #if defined(HAS_ARGBSHUFFLEROW_AVX2) 1899 if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { 1900 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2; 1901 if (IS_ALIGNED(width, 16)) { 1902 ARGBShuffleRow = ARGBShuffleRow_AVX2; 1903 } 1904 } 1905 #endif 1906 #if defined(HAS_ARGBSHUFFLEROW_NEON) 1907 if (TestCpuFlag(kCpuHasNEON) && width >= 4) { 1908 ARGBShuffleRow = ARGBShuffleRow_Any_NEON; 1909 if (IS_ALIGNED(width, 4)) { 1910 ARGBShuffleRow = ARGBShuffleRow_NEON; 1911 } 1912 } 1913 #endif 1914 1915 for (y = 0; y < height; ++y) { 1916 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width); 1917 src_bgra += src_stride_bgra; 1918 dst_argb += dst_stride_argb; 1919 } 1920 return 0; 1921 } 1922 1923 // Sobel ARGB effect. 1924 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, 1925 uint8* dst_argb, int dst_stride_argb, 1926 int width, int height, 1927 void (*SobelRow)(const uint8* src_sobelx, 1928 const uint8* src_sobely, 1929 uint8* dst, int width)) { 1930 int y; 1931 void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, 1932 uint32 selector, int pix) = ARGBToBayerGGRow_C; 1933 void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, 1934 uint8* dst_sobely, int width) = SobelYRow_C; 1935 void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, 1936 const uint8* src_y2, uint8* dst_sobely, int width) = 1937 SobelXRow_C; 1938 const int kEdge = 16; // Extra pixels at start of row for extrude/align. 1939 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1940 return -1; 1941 } 1942 // Negative height means invert the image. 1943 if (height < 0) { 1944 height = -height; 1945 src_argb = src_argb + (height - 1) * src_stride_argb; 1946 src_stride_argb = -src_stride_argb; 1947 } 1948 // ARGBToBayer used to select G channel from ARGB. 1949 #if defined(HAS_ARGBTOBAYERGGROW_SSE2) 1950 if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && 1951 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { 1952 ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2; 1953 if (IS_ALIGNED(width, 8)) { 1954 ARGBToBayerRow = ARGBToBayerGGRow_SSE2; 1955 } 1956 } 1957 #endif 1958 #if defined(HAS_ARGBTOBAYERROW_SSSE3) 1959 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 && 1960 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { 1961 ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3; 1962 if (IS_ALIGNED(width, 8)) { 1963 ARGBToBayerRow = ARGBToBayerRow_SSSE3; 1964 } 1965 } 1966 #endif 1967 #if defined(HAS_ARGBTOBAYERGGROW_NEON) 1968 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1969 ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON; 1970 if (IS_ALIGNED(width, 8)) { 1971 ARGBToBayerRow = ARGBToBayerGGRow_NEON; 1972 } 1973 } 1974 #endif 1975 #if defined(HAS_SOBELYROW_SSE2) 1976 if (TestCpuFlag(kCpuHasSSE2)) { 1977 SobelYRow = SobelYRow_SSE2; 1978 } 1979 #endif 1980 #if defined(HAS_SOBELYROW_NEON) 1981 if (TestCpuFlag(kCpuHasNEON)) { 1982 SobelYRow = SobelYRow_NEON; 1983 } 1984 #endif 1985 #if defined(HAS_SOBELXROW_SSE2) 1986 if (TestCpuFlag(kCpuHasSSE2)) { 1987 SobelXRow = SobelXRow_SSE2; 1988 } 1989 #endif 1990 #if defined(HAS_SOBELXROW_NEON) 1991 if (TestCpuFlag(kCpuHasNEON)) { 1992 SobelXRow = SobelXRow_NEON; 1993 } 1994 #endif 1995 { 1996 // 3 rows with edges before/after. 1997 const int kRowSize = (width + kEdge + 15) & ~15; 1998 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); 1999 uint8* row_sobelx = rows; 2000 uint8* row_sobely = rows + kRowSize; 2001 uint8* row_y = rows + kRowSize * 2; 2002 2003 // Convert first row. 2004 uint8* row_y0 = row_y + kEdge; 2005 uint8* row_y1 = row_y0 + kRowSize; 2006 uint8* row_y2 = row_y1 + kRowSize; 2007 ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width); 2008 row_y0[-1] = row_y0[0]; 2009 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. 2010 ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width); 2011 row_y1[-1] = row_y1[0]; 2012 memset(row_y1 + width, row_y1[width - 1], 16); 2013 memset(row_y2 + width, 0, 16); 2014 2015 for (y = 0; y < height; ++y) { 2016 // Convert next row of ARGB to Y. 2017 if (y < (height - 1)) { 2018 src_argb += src_stride_argb; 2019 } 2020 ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width); 2021 row_y2[-1] = row_y2[0]; 2022 row_y2[width] = row_y2[width - 1]; 2023 2024 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width); 2025 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width); 2026 SobelRow(row_sobelx, row_sobely, dst_argb, width); 2027 2028 // Cycle thru circular queue of 3 row_y buffers. 2029 { 2030 uint8* row_yt = row_y0; 2031 row_y0 = row_y1; 2032 row_y1 = row_y2; 2033 row_y2 = row_yt; 2034 } 2035 2036 dst_argb += dst_stride_argb; 2037 } 2038 free_aligned_buffer_64(rows); 2039 } 2040 return 0; 2041 } 2042 2043 // Sobel ARGB effect. 2044 LIBYUV_API 2045 int ARGBSobel(const uint8* src_argb, int src_stride_argb, 2046 uint8* dst_argb, int dst_stride_argb, 2047 int width, int height) { 2048 void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely, 2049 uint8* dst_argb, int width) = SobelRow_C; 2050 #if defined(HAS_SOBELROW_SSE2) 2051 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && 2052 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 2053 SobelRow = SobelRow_SSE2; 2054 } 2055 #endif 2056 #if defined(HAS_SOBELROW_NEON) 2057 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 2058 SobelRow = SobelRow_NEON; 2059 } 2060 #endif 2061 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, 2062 width, height, SobelRow); 2063 } 2064 2065 // Sobel ARGB effect with planar output. 2066 LIBYUV_API 2067 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, 2068 uint8* dst_y, int dst_stride_y, 2069 int width, int height) { 2070 void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely, 2071 uint8* dst_, int width) = SobelToPlaneRow_C; 2072 #if defined(HAS_SOBELTOPLANEROW_SSE2) 2073 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && 2074 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 2075 SobelToPlaneRow = SobelToPlaneRow_SSE2; 2076 } 2077 #endif 2078 #if defined(HAS_SOBELTOPLANEROW_NEON) 2079 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { 2080 SobelToPlaneRow = SobelToPlaneRow_NEON; 2081 } 2082 #endif 2083 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, 2084 width, height, SobelToPlaneRow); 2085 } 2086 2087 // SobelXY ARGB effect. 2088 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. 2089 LIBYUV_API 2090 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, 2091 uint8* dst_argb, int dst_stride_argb, 2092 int width, int height) { 2093 void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, 2094 uint8* dst_argb, int width) = SobelXYRow_C; 2095 #if defined(HAS_SOBELXYROW_SSE2) 2096 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && 2097 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 2098 SobelXYRow = SobelXYRow_SSE2; 2099 } 2100 #endif 2101 #if defined(HAS_SOBELXYROW_NEON) 2102 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 2103 SobelXYRow = SobelXYRow_NEON; 2104 } 2105 #endif 2106 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, 2107 width, height, SobelXYRow); 2108 } 2109 2110 // Apply a 4x4 polynomial to each ARGB pixel. 2111 LIBYUV_API 2112 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, 2113 uint8* dst_argb, int dst_stride_argb, 2114 const float* poly, 2115 int width, int height) { 2116 int y; 2117 void (*ARGBPolynomialRow)(const uint8* src_argb, 2118 uint8* dst_argb, const float* poly, 2119 int width) = ARGBPolynomialRow_C; 2120 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) { 2121 return -1; 2122 } 2123 // Negative height means invert the image. 2124 if (height < 0) { 2125 height = -height; 2126 src_argb = src_argb + (height - 1) * src_stride_argb; 2127 src_stride_argb = -src_stride_argb; 2128 } 2129 // Coalesce rows. 2130 if (src_stride_argb == width * 4 && 2131 dst_stride_argb == width * 4) { 2132 width *= height; 2133 height = 1; 2134 src_stride_argb = dst_stride_argb = 0; 2135 } 2136 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2) 2137 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) { 2138 ARGBPolynomialRow = ARGBPolynomialRow_SSE2; 2139 } 2140 #endif 2141 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2) 2142 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) && 2143 IS_ALIGNED(width, 2)) { 2144 ARGBPolynomialRow = ARGBPolynomialRow_AVX2; 2145 } 2146 #endif 2147 2148 for (y = 0; y < height; ++y) { 2149 ARGBPolynomialRow(src_argb, dst_argb, poly, width); 2150 src_argb += src_stride_argb; 2151 dst_argb += dst_stride_argb; 2152 } 2153 return 0; 2154 } 2155 2156 // Apply a lumacolortable to each ARGB pixel. 2157 LIBYUV_API 2158 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, 2159 uint8* dst_argb, int dst_stride_argb, 2160 const uint8* luma, 2161 int width, int height) { 2162 int y; 2163 void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb, 2164 int width, const uint8* luma, const uint32 lumacoeff) = 2165 ARGBLumaColorTableRow_C; 2166 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { 2167 return -1; 2168 } 2169 // Negative height means invert the image. 2170 if (height < 0) { 2171 height = -height; 2172 src_argb = src_argb + (height - 1) * src_stride_argb; 2173 src_stride_argb = -src_stride_argb; 2174 } 2175 // Coalesce rows. 2176 if (src_stride_argb == width * 4 && 2177 dst_stride_argb == width * 4) { 2178 width *= height; 2179 height = 1; 2180 src_stride_argb = dst_stride_argb = 0; 2181 } 2182 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3) 2183 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { 2184 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3; 2185 } 2186 #endif 2187 2188 for (y = 0; y < height; ++y) { 2189 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f); 2190 src_argb += src_stride_argb; 2191 dst_argb += dst_stride_argb; 2192 } 2193 return 0; 2194 } 2195 2196 // Copy Alpha from one ARGB image to another. 2197 LIBYUV_API 2198 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, 2199 uint8* dst_argb, int dst_stride_argb, 2200 int width, int height) { 2201 int y; 2202 void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) = 2203 ARGBCopyAlphaRow_C; 2204 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 2205 return -1; 2206 } 2207 // Negative height means invert the image. 2208 if (height < 0) { 2209 height = -height; 2210 src_argb = src_argb + (height - 1) * src_stride_argb; 2211 src_stride_argb = -src_stride_argb; 2212 } 2213 // Coalesce rows. 2214 if (src_stride_argb == width * 4 && 2215 dst_stride_argb == width * 4) { 2216 width *= height; 2217 height = 1; 2218 src_stride_argb = dst_stride_argb = 0; 2219 } 2220 #if defined(HAS_ARGBCOPYALPHAROW_SSE2) 2221 if (TestCpuFlag(kCpuHasSSE2) && 2222 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 2223 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && 2224 IS_ALIGNED(width, 8)) { 2225 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2; 2226 } 2227 #endif 2228 #if defined(HAS_ARGBCOPYALPHAROW_AVX2) 2229 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { 2230 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2; 2231 } 2232 #endif 2233 2234 for (y = 0; y < height; ++y) { 2235 ARGBCopyAlphaRow(src_argb, dst_argb, width); 2236 src_argb += src_stride_argb; 2237 dst_argb += dst_stride_argb; 2238 } 2239 return 0; 2240 } 2241 2242 // Copy a planar Y channel to the alpha channel of a destination ARGB image. 2243 LIBYUV_API 2244 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, 2245 uint8* dst_argb, int dst_stride_argb, 2246 int width, int height) { 2247 int y; 2248 void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = 2249 ARGBCopyYToAlphaRow_C; 2250 if (!src_y || !dst_argb || width <= 0 || height == 0) { 2251 return -1; 2252 } 2253 // Negative height means invert the image. 2254 if (height < 0) { 2255 height = -height; 2256 src_y = src_y + (height - 1) * src_stride_y; 2257 src_stride_y = -src_stride_y; 2258 } 2259 // Coalesce rows. 2260 if (src_stride_y == width && 2261 dst_stride_argb == width * 4) { 2262 width *= height; 2263 height = 1; 2264 src_stride_y = dst_stride_argb = 0; 2265 } 2266 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2) 2267 if (TestCpuFlag(kCpuHasSSE2) && 2268 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 2269 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && 2270 IS_ALIGNED(width, 8)) { 2271 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; 2272 } 2273 #endif 2274 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2) 2275 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { 2276 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; 2277 } 2278 #endif 2279 2280 for (y = 0; y < height; ++y) { 2281 ARGBCopyYToAlphaRow(src_y, dst_argb, width); 2282 src_y += src_stride_y; 2283 dst_argb += dst_stride_argb; 2284 } 2285 return 0; 2286 } 2287 2288 #ifdef __cplusplus 2289 } // extern "C" 2290 } // namespace libyuv 2291 #endif 2292