1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/planar_functions.h" 12 13 #include <string.h> // for memset() 14 15 #include "libyuv/cpu_id.h" 16 #ifdef HAVE_JPEG 17 #include "libyuv/mjpeg_decoder.h" 18 #endif 19 #include "libyuv/row.h" 20 21 #ifdef __cplusplus 22 namespace libyuv { 23 extern "C" { 24 #endif 25 26 // Copy a plane of data 27 LIBYUV_API 28 void CopyPlane(const uint8* src_y, int src_stride_y, 29 uint8* dst_y, int dst_stride_y, 30 int width, int height) { 31 int y; 32 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; 33 // Coalesce rows. 34 if (src_stride_y == width && 35 dst_stride_y == width) { 36 width *= height; 37 height = 1; 38 src_stride_y = dst_stride_y = 0; 39 } 40 #if defined(HAS_COPYROW_X86) 41 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { 42 CopyRow = CopyRow_X86; 43 } 44 #endif 45 #if defined(HAS_COPYROW_SSE2) 46 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && 47 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 48 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 49 CopyRow = CopyRow_SSE2; 50 } 51 #endif 52 #if defined(HAS_COPYROW_ERMS) 53 if (TestCpuFlag(kCpuHasERMS)) { 54 CopyRow = CopyRow_ERMS; 55 } 56 #endif 57 #if defined(HAS_COPYROW_NEON) 58 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { 59 CopyRow = CopyRow_NEON; 60 } 61 #endif 62 #if defined(HAS_COPYROW_MIPS) 63 if (TestCpuFlag(kCpuHasMIPS)) { 64 CopyRow = CopyRow_MIPS; 65 } 66 #endif 67 68 // Copy plane 69 for (y = 0; y < height; ++y) { 70 CopyRow(src_y, dst_y, width); 71 src_y += src_stride_y; 72 dst_y += dst_stride_y; 73 } 74 } 75 76 LIBYUV_API 77 void CopyPlane_16(const uint16* src_y, int src_stride_y, 78 uint16* dst_y, int dst_stride_y, 79 int width, int height) { 80 int y; 81 void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C; 82 // Coalesce rows. 83 if (src_stride_y == width && 84 dst_stride_y == width) { 85 width *= height; 86 height = 1; 87 src_stride_y = dst_stride_y = 0; 88 } 89 #if defined(HAS_COPYROW_16_X86) 90 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { 91 CopyRow = CopyRow_16_X86; 92 } 93 #endif 94 #if defined(HAS_COPYROW_16_SSE2) 95 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && 96 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 97 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 98 CopyRow = CopyRow_16_SSE2; 99 } 100 #endif 101 #if defined(HAS_COPYROW_16_ERMS) 102 if (TestCpuFlag(kCpuHasERMS)) { 103 CopyRow = CopyRow_16_ERMS; 104 } 105 #endif 106 #if defined(HAS_COPYROW_16_NEON) 107 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { 108 CopyRow = CopyRow_16_NEON; 109 } 110 #endif 111 #if defined(HAS_COPYROW_16_MIPS) 112 if (TestCpuFlag(kCpuHasMIPS)) { 113 CopyRow = CopyRow_16_MIPS; 114 } 115 #endif 116 117 // Copy plane 118 for (y = 0; y < height; ++y) { 119 CopyRow(src_y, dst_y, width); 120 src_y += src_stride_y; 121 dst_y += dst_stride_y; 122 } 123 } 124 125 // Copy I422. 126 LIBYUV_API 127 int I422Copy(const uint8* src_y, int src_stride_y, 128 const uint8* src_u, int src_stride_u, 129 const uint8* src_v, int src_stride_v, 130 uint8* dst_y, int dst_stride_y, 131 uint8* dst_u, int dst_stride_u, 132 uint8* dst_v, int dst_stride_v, 133 int width, int height) { 134 int halfwidth = (width + 1) >> 1; 135 if (!src_y || !src_u || !src_v || 136 !dst_y || !dst_u || !dst_v || 137 width <= 0 || height == 0) { 138 return -1; 139 } 140 // Negative height means invert the image. 141 if (height < 0) { 142 height = -height; 143 src_y = src_y + (height - 1) * src_stride_y; 144 src_u = src_u + (height - 1) * src_stride_u; 145 src_v = src_v + (height - 1) * src_stride_v; 146 src_stride_y = -src_stride_y; 147 src_stride_u = -src_stride_u; 148 src_stride_v = -src_stride_v; 149 } 150 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 151 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); 152 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); 153 return 0; 154 } 155 156 // Copy I444. 157 LIBYUV_API 158 int I444Copy(const uint8* src_y, int src_stride_y, 159 const uint8* src_u, int src_stride_u, 160 const uint8* src_v, int src_stride_v, 161 uint8* dst_y, int dst_stride_y, 162 uint8* dst_u, int dst_stride_u, 163 uint8* dst_v, int dst_stride_v, 164 int width, int height) { 165 if (!src_y || !src_u || !src_v || 166 !dst_y || !dst_u || !dst_v || 167 width <= 0 || height == 0) { 168 return -1; 169 } 170 // Negative height means invert the image. 171 if (height < 0) { 172 height = -height; 173 src_y = src_y + (height - 1) * src_stride_y; 174 src_u = src_u + (height - 1) * src_stride_u; 175 src_v = src_v + (height - 1) * src_stride_v; 176 src_stride_y = -src_stride_y; 177 src_stride_u = -src_stride_u; 178 src_stride_v = -src_stride_v; 179 } 180 181 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 182 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height); 183 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height); 184 return 0; 185 } 186 187 // Copy I400. 188 LIBYUV_API 189 int I400ToI400(const uint8* src_y, int src_stride_y, 190 uint8* dst_y, int dst_stride_y, 191 int width, int height) { 192 if (!src_y || !dst_y || width <= 0 || height == 0) { 193 return -1; 194 } 195 // Negative height means invert the image. 196 if (height < 0) { 197 height = -height; 198 src_y = src_y + (height - 1) * src_stride_y; 199 src_stride_y = -src_stride_y; 200 } 201 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 202 return 0; 203 } 204 205 // Convert I420 to I400. 206 LIBYUV_API 207 int I420ToI400(const uint8* src_y, int src_stride_y, 208 const uint8* src_u, int src_stride_u, 209 const uint8* src_v, int src_stride_v, 210 uint8* dst_y, int dst_stride_y, 211 int width, int height) { 212 if (!src_y || !dst_y || width <= 0 || height == 0) { 213 return -1; 214 } 215 // Negative height means invert the image. 216 if (height < 0) { 217 height = -height; 218 src_y = src_y + (height - 1) * src_stride_y; 219 src_stride_y = -src_stride_y; 220 } 221 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 222 return 0; 223 } 224 225 // Mirror a plane of data. 226 void MirrorPlane(const uint8* src_y, int src_stride_y, 227 uint8* dst_y, int dst_stride_y, 228 int width, int height) { 229 int y; 230 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; 231 // Negative height means invert the image. 232 if (height < 0) { 233 height = -height; 234 src_y = src_y + (height - 1) * src_stride_y; 235 src_stride_y = -src_stride_y; 236 } 237 #if defined(HAS_MIRRORROW_NEON) 238 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { 239 MirrorRow = MirrorRow_NEON; 240 } 241 #endif 242 #if defined(HAS_MIRRORROW_SSE2) 243 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { 244 MirrorRow = MirrorRow_SSE2; 245 } 246 #endif 247 #if defined(HAS_MIRRORROW_SSSE3) 248 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && 249 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 250 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 251 MirrorRow = MirrorRow_SSSE3; 252 } 253 #endif 254 #if defined(HAS_MIRRORROW_AVX2) 255 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) { 256 MirrorRow = MirrorRow_AVX2; 257 } 258 #endif 259 260 // Mirror plane 261 for (y = 0; y < height; ++y) { 262 MirrorRow(src_y, dst_y, width); 263 src_y += src_stride_y; 264 dst_y += dst_stride_y; 265 } 266 } 267 268 // Convert YUY2 to I422. 269 LIBYUV_API 270 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, 271 uint8* dst_y, int dst_stride_y, 272 uint8* dst_u, int dst_stride_u, 273 uint8* dst_v, int dst_stride_v, 274 int width, int height) { 275 int y; 276 void (*YUY2ToUV422Row)(const uint8* src_yuy2, 277 uint8* dst_u, uint8* dst_v, int pix) = 278 YUY2ToUV422Row_C; 279 void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) = 280 YUY2ToYRow_C; 281 // Negative height means invert the image. 282 if (height < 0) { 283 height = -height; 284 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; 285 src_stride_yuy2 = -src_stride_yuy2; 286 } 287 // Coalesce rows. 288 if (src_stride_yuy2 == width * 2 && 289 dst_stride_y == width && 290 dst_stride_u * 2 == width && 291 dst_stride_v * 2 == width) { 292 width *= height; 293 height = 1; 294 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0; 295 } 296 #if defined(HAS_YUY2TOYROW_SSE2) 297 if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { 298 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; 299 YUY2ToYRow = YUY2ToYRow_Any_SSE2; 300 if (IS_ALIGNED(width, 16)) { 301 YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2; 302 YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; 303 if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) { 304 YUY2ToUV422Row = YUY2ToUV422Row_SSE2; 305 if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 306 YUY2ToYRow = YUY2ToYRow_SSE2; 307 } 308 } 309 } 310 } 311 #endif 312 #if defined(HAS_YUY2TOYROW_AVX2) 313 if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { 314 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2; 315 YUY2ToYRow = YUY2ToYRow_Any_AVX2; 316 if (IS_ALIGNED(width, 32)) { 317 YUY2ToUV422Row = YUY2ToUV422Row_AVX2; 318 YUY2ToYRow = YUY2ToYRow_AVX2; 319 } 320 } 321 #endif 322 #if defined(HAS_YUY2TOYROW_NEON) 323 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 324 YUY2ToYRow = YUY2ToYRow_Any_NEON; 325 if (width >= 16) { 326 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; 327 } 328 if (IS_ALIGNED(width, 16)) { 329 YUY2ToYRow = YUY2ToYRow_NEON; 330 YUY2ToUV422Row = YUY2ToUV422Row_NEON; 331 } 332 } 333 #endif 334 335 for (y = 0; y < height; ++y) { 336 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); 337 YUY2ToYRow(src_yuy2, dst_y, width); 338 src_yuy2 += src_stride_yuy2; 339 dst_y += dst_stride_y; 340 dst_u += dst_stride_u; 341 dst_v += dst_stride_v; 342 } 343 return 0; 344 } 345 346 // Convert UYVY to I422. 347 LIBYUV_API 348 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, 349 uint8* dst_y, int dst_stride_y, 350 uint8* dst_u, int dst_stride_u, 351 uint8* dst_v, int dst_stride_v, 352 int width, int height) { 353 int y; 354 void (*UYVYToUV422Row)(const uint8* src_uyvy, 355 uint8* dst_u, uint8* dst_v, int pix) = 356 UYVYToUV422Row_C; 357 void (*UYVYToYRow)(const uint8* src_uyvy, 358 uint8* dst_y, int pix) = UYVYToYRow_C; 359 // Negative height means invert the image. 360 if (height < 0) { 361 height = -height; 362 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; 363 src_stride_uyvy = -src_stride_uyvy; 364 } 365 // Coalesce rows. 366 if (src_stride_uyvy == width * 2 && 367 dst_stride_y == width && 368 dst_stride_u * 2 == width && 369 dst_stride_v * 2 == width) { 370 width *= height; 371 height = 1; 372 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0; 373 } 374 #if defined(HAS_UYVYTOYROW_SSE2) 375 if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { 376 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2; 377 UYVYToYRow = UYVYToYRow_Any_SSE2; 378 if (IS_ALIGNED(width, 16)) { 379 UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2; 380 UYVYToYRow = UYVYToYRow_Unaligned_SSE2; 381 if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) { 382 UYVYToUV422Row = UYVYToUV422Row_SSE2; 383 if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 384 UYVYToYRow = UYVYToYRow_SSE2; 385 } 386 } 387 } 388 } 389 #endif 390 #if defined(HAS_UYVYTOYROW_AVX2) 391 if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { 392 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2; 393 UYVYToYRow = UYVYToYRow_Any_AVX2; 394 if (IS_ALIGNED(width, 32)) { 395 UYVYToUV422Row = UYVYToUV422Row_AVX2; 396 UYVYToYRow = UYVYToYRow_AVX2; 397 } 398 } 399 #endif 400 #if defined(HAS_UYVYTOYROW_NEON) 401 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 402 UYVYToYRow = UYVYToYRow_Any_NEON; 403 if (width >= 16) { 404 UYVYToUV422Row = UYVYToUV422Row_Any_NEON; 405 } 406 if (IS_ALIGNED(width, 16)) { 407 UYVYToYRow = UYVYToYRow_NEON; 408 UYVYToUV422Row = UYVYToUV422Row_NEON; 409 } 410 } 411 #endif 412 413 for (y = 0; y < height; ++y) { 414 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width); 415 UYVYToYRow(src_uyvy, dst_y, width); 416 src_uyvy += src_stride_uyvy; 417 dst_y += dst_stride_y; 418 dst_u += dst_stride_u; 419 dst_v += dst_stride_v; 420 } 421 return 0; 422 } 423 424 // Mirror I400 with optional flipping 425 LIBYUV_API 426 int I400Mirror(const uint8* src_y, int src_stride_y, 427 uint8* dst_y, int dst_stride_y, 428 int width, int height) { 429 if (!src_y || !dst_y || 430 width <= 0 || height == 0) { 431 return -1; 432 } 433 // Negative height means invert the image. 434 if (height < 0) { 435 height = -height; 436 src_y = src_y + (height - 1) * src_stride_y; 437 src_stride_y = -src_stride_y; 438 } 439 440 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 441 return 0; 442 } 443 444 // Mirror I420 with optional flipping 445 LIBYUV_API 446 int I420Mirror(const uint8* src_y, int src_stride_y, 447 const uint8* src_u, int src_stride_u, 448 const uint8* src_v, int src_stride_v, 449 uint8* dst_y, int dst_stride_y, 450 uint8* dst_u, int dst_stride_u, 451 uint8* dst_v, int dst_stride_v, 452 int width, int height) { 453 int halfwidth = (width + 1) >> 1; 454 int halfheight = (height + 1) >> 1; 455 if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || 456 width <= 0 || height == 0) { 457 return -1; 458 } 459 // Negative height means invert the image. 460 if (height < 0) { 461 height = -height; 462 halfheight = (height + 1) >> 1; 463 src_y = src_y + (height - 1) * src_stride_y; 464 src_u = src_u + (halfheight - 1) * src_stride_u; 465 src_v = src_v + (halfheight - 1) * src_stride_v; 466 src_stride_y = -src_stride_y; 467 src_stride_u = -src_stride_u; 468 src_stride_v = -src_stride_v; 469 } 470 471 if (dst_y) { 472 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 473 } 474 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); 475 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); 476 return 0; 477 } 478 479 // ARGB mirror. 480 LIBYUV_API 481 int ARGBMirror(const uint8* src_argb, int src_stride_argb, 482 uint8* dst_argb, int dst_stride_argb, 483 int width, int height) { 484 int y; 485 void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = 486 ARGBMirrorRow_C; 487 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 488 return -1; 489 } 490 // Negative height means invert the image. 491 if (height < 0) { 492 height = -height; 493 src_argb = src_argb + (height - 1) * src_stride_argb; 494 src_stride_argb = -src_stride_argb; 495 } 496 497 #if defined(HAS_ARGBMIRRORROW_SSSE3) 498 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && 499 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 500 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 501 ARGBMirrorRow = ARGBMirrorRow_SSSE3; 502 } 503 #endif 504 #if defined(HAS_ARGBMIRRORROW_AVX2) 505 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { 506 ARGBMirrorRow = ARGBMirrorRow_AVX2; 507 } 508 #endif 509 #if defined(HAS_ARGBMIRRORROW_NEON) 510 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) { 511 ARGBMirrorRow = ARGBMirrorRow_NEON; 512 } 513 #endif 514 515 // Mirror plane 516 for (y = 0; y < height; ++y) { 517 ARGBMirrorRow(src_argb, dst_argb, width); 518 src_argb += src_stride_argb; 519 dst_argb += dst_stride_argb; 520 } 521 return 0; 522 } 523 524 // Get a blender that optimized for the CPU, alignment and pixel count. 525 // As there are 6 blenders to choose from, the caller should try to use 526 // the same blend function for all pixels if possible. 527 LIBYUV_API 528 ARGBBlendRow GetARGBBlend() { 529 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, 530 uint8* dst_argb, int width) = ARGBBlendRow_C; 531 #if defined(HAS_ARGBBLENDROW_SSSE3) 532 if (TestCpuFlag(kCpuHasSSSE3)) { 533 ARGBBlendRow = ARGBBlendRow_SSSE3; 534 return ARGBBlendRow; 535 } 536 #endif 537 #if defined(HAS_ARGBBLENDROW_SSE2) 538 if (TestCpuFlag(kCpuHasSSE2)) { 539 ARGBBlendRow = ARGBBlendRow_SSE2; 540 } 541 #endif 542 #if defined(HAS_ARGBBLENDROW_NEON) 543 if (TestCpuFlag(kCpuHasNEON)) { 544 ARGBBlendRow = ARGBBlendRow_NEON; 545 } 546 #endif 547 return ARGBBlendRow; 548 } 549 550 // Alpha Blend 2 ARGB images and store to destination. 551 LIBYUV_API 552 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, 553 const uint8* src_argb1, int src_stride_argb1, 554 uint8* dst_argb, int dst_stride_argb, 555 int width, int height) { 556 int y; 557 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, 558 uint8* dst_argb, int width) = GetARGBBlend(); 559 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 560 return -1; 561 } 562 // Negative height means invert the image. 563 if (height < 0) { 564 height = -height; 565 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 566 dst_stride_argb = -dst_stride_argb; 567 } 568 // Coalesce rows. 569 if (src_stride_argb0 == width * 4 && 570 src_stride_argb1 == width * 4 && 571 dst_stride_argb == width * 4) { 572 width *= height; 573 height = 1; 574 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 575 } 576 577 for (y = 0; y < height; ++y) { 578 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width); 579 src_argb0 += src_stride_argb0; 580 src_argb1 += src_stride_argb1; 581 dst_argb += dst_stride_argb; 582 } 583 return 0; 584 } 585 586 // Multiply 2 ARGB images and store to destination. 587 LIBYUV_API 588 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, 589 const uint8* src_argb1, int src_stride_argb1, 590 uint8* dst_argb, int dst_stride_argb, 591 int width, int height) { 592 int y; 593 void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst, 594 int width) = ARGBMultiplyRow_C; 595 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 596 return -1; 597 } 598 // Negative height means invert the image. 599 if (height < 0) { 600 height = -height; 601 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 602 dst_stride_argb = -dst_stride_argb; 603 } 604 // Coalesce rows. 605 if (src_stride_argb0 == width * 4 && 606 src_stride_argb1 == width * 4 && 607 dst_stride_argb == width * 4) { 608 width *= height; 609 height = 1; 610 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 611 } 612 #if defined(HAS_ARGBMULTIPLYROW_SSE2) 613 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 614 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2; 615 if (IS_ALIGNED(width, 4)) { 616 ARGBMultiplyRow = ARGBMultiplyRow_SSE2; 617 } 618 } 619 #endif 620 #if defined(HAS_ARGBMULTIPLYROW_AVX2) 621 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 622 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2; 623 if (IS_ALIGNED(width, 8)) { 624 ARGBMultiplyRow = ARGBMultiplyRow_AVX2; 625 } 626 } 627 #endif 628 #if defined(HAS_ARGBMULTIPLYROW_NEON) 629 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 630 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON; 631 if (IS_ALIGNED(width, 8)) { 632 ARGBMultiplyRow = ARGBMultiplyRow_NEON; 633 } 634 } 635 #endif 636 637 // Multiply plane 638 for (y = 0; y < height; ++y) { 639 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width); 640 src_argb0 += src_stride_argb0; 641 src_argb1 += src_stride_argb1; 642 dst_argb += dst_stride_argb; 643 } 644 return 0; 645 } 646 647 // Add 2 ARGB images and store to destination. 648 LIBYUV_API 649 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, 650 const uint8* src_argb1, int src_stride_argb1, 651 uint8* dst_argb, int dst_stride_argb, 652 int width, int height) { 653 int y; 654 void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst, 655 int width) = ARGBAddRow_C; 656 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 657 return -1; 658 } 659 // Negative height means invert the image. 660 if (height < 0) { 661 height = -height; 662 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 663 dst_stride_argb = -dst_stride_argb; 664 } 665 // Coalesce rows. 666 if (src_stride_argb0 == width * 4 && 667 src_stride_argb1 == width * 4 && 668 dst_stride_argb == width * 4) { 669 width *= height; 670 height = 1; 671 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 672 } 673 #if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER) 674 if (TestCpuFlag(kCpuHasSSE2)) { 675 ARGBAddRow = ARGBAddRow_SSE2; 676 } 677 #endif 678 #if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER) 679 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 680 ARGBAddRow = ARGBAddRow_Any_SSE2; 681 if (IS_ALIGNED(width, 4)) { 682 ARGBAddRow = ARGBAddRow_SSE2; 683 } 684 } 685 #endif 686 #if defined(HAS_ARGBADDROW_AVX2) 687 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 688 ARGBAddRow = ARGBAddRow_Any_AVX2; 689 if (IS_ALIGNED(width, 8)) { 690 ARGBAddRow = ARGBAddRow_AVX2; 691 } 692 } 693 #endif 694 #if defined(HAS_ARGBADDROW_NEON) 695 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 696 ARGBAddRow = ARGBAddRow_Any_NEON; 697 if (IS_ALIGNED(width, 8)) { 698 ARGBAddRow = ARGBAddRow_NEON; 699 } 700 } 701 #endif 702 703 // Add plane 704 for (y = 0; y < height; ++y) { 705 ARGBAddRow(src_argb0, src_argb1, dst_argb, width); 706 src_argb0 += src_stride_argb0; 707 src_argb1 += src_stride_argb1; 708 dst_argb += dst_stride_argb; 709 } 710 return 0; 711 } 712 713 // Subtract 2 ARGB images and store to destination. 714 LIBYUV_API 715 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, 716 const uint8* src_argb1, int src_stride_argb1, 717 uint8* dst_argb, int dst_stride_argb, 718 int width, int height) { 719 int y; 720 void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst, 721 int width) = ARGBSubtractRow_C; 722 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 723 return -1; 724 } 725 // Negative height means invert the image. 726 if (height < 0) { 727 height = -height; 728 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 729 dst_stride_argb = -dst_stride_argb; 730 } 731 // Coalesce rows. 732 if (src_stride_argb0 == width * 4 && 733 src_stride_argb1 == width * 4 && 734 dst_stride_argb == width * 4) { 735 width *= height; 736 height = 1; 737 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 738 } 739 #if defined(HAS_ARGBSUBTRACTROW_SSE2) 740 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 741 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2; 742 if (IS_ALIGNED(width, 4)) { 743 ARGBSubtractRow = ARGBSubtractRow_SSE2; 744 } 745 } 746 #endif 747 #if defined(HAS_ARGBSUBTRACTROW_AVX2) 748 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 749 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2; 750 if (IS_ALIGNED(width, 8)) { 751 ARGBSubtractRow = ARGBSubtractRow_AVX2; 752 } 753 } 754 #endif 755 #if defined(HAS_ARGBSUBTRACTROW_NEON) 756 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 757 ARGBSubtractRow = ARGBSubtractRow_Any_NEON; 758 if (IS_ALIGNED(width, 8)) { 759 ARGBSubtractRow = ARGBSubtractRow_NEON; 760 } 761 } 762 #endif 763 764 // Subtract plane 765 for (y = 0; y < height; ++y) { 766 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width); 767 src_argb0 += src_stride_argb0; 768 src_argb1 += src_stride_argb1; 769 dst_argb += dst_stride_argb; 770 } 771 return 0; 772 } 773 774 // Convert I422 to BGRA. 775 LIBYUV_API 776 int I422ToBGRA(const uint8* src_y, int src_stride_y, 777 const uint8* src_u, int src_stride_u, 778 const uint8* src_v, int src_stride_v, 779 uint8* dst_bgra, int dst_stride_bgra, 780 int width, int height) { 781 int y; 782 void (*I422ToBGRARow)(const uint8* y_buf, 783 const uint8* u_buf, 784 const uint8* v_buf, 785 uint8* rgb_buf, 786 int width) = I422ToBGRARow_C; 787 if (!src_y || !src_u || !src_v || 788 !dst_bgra || 789 width <= 0 || height == 0) { 790 return -1; 791 } 792 // Negative height means invert the image. 793 if (height < 0) { 794 height = -height; 795 dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; 796 dst_stride_bgra = -dst_stride_bgra; 797 } 798 // Coalesce rows. 799 if (src_stride_y == width && 800 src_stride_u * 2 == width && 801 src_stride_v * 2 == width && 802 dst_stride_bgra == width * 4) { 803 width *= height; 804 height = 1; 805 src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0; 806 } 807 #if defined(HAS_I422TOBGRAROW_NEON) 808 if (TestCpuFlag(kCpuHasNEON)) { 809 I422ToBGRARow = I422ToBGRARow_Any_NEON; 810 if (IS_ALIGNED(width, 16)) { 811 I422ToBGRARow = I422ToBGRARow_NEON; 812 } 813 } 814 #elif defined(HAS_I422TOBGRAROW_SSSE3) 815 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 816 I422ToBGRARow = I422ToBGRARow_Any_SSSE3; 817 if (IS_ALIGNED(width, 8)) { 818 I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3; 819 if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) { 820 I422ToBGRARow = I422ToBGRARow_SSSE3; 821 } 822 } 823 } 824 #elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2) 825 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && 826 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && 827 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && 828 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && 829 IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) { 830 I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2; 831 } 832 #endif 833 834 for (y = 0; y < height; ++y) { 835 I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width); 836 dst_bgra += dst_stride_bgra; 837 src_y += src_stride_y; 838 src_u += src_stride_u; 839 src_v += src_stride_v; 840 } 841 return 0; 842 } 843 844 // Convert I422 to ABGR. 845 LIBYUV_API 846 int I422ToABGR(const uint8* src_y, int src_stride_y, 847 const uint8* src_u, int src_stride_u, 848 const uint8* src_v, int src_stride_v, 849 uint8* dst_abgr, int dst_stride_abgr, 850 int width, int height) { 851 int y; 852 void (*I422ToABGRRow)(const uint8* y_buf, 853 const uint8* u_buf, 854 const uint8* v_buf, 855 uint8* rgb_buf, 856 int width) = I422ToABGRRow_C; 857 if (!src_y || !src_u || !src_v || 858 !dst_abgr || 859 width <= 0 || height == 0) { 860 return -1; 861 } 862 // Negative height means invert the image. 863 if (height < 0) { 864 height = -height; 865 dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; 866 dst_stride_abgr = -dst_stride_abgr; 867 } 868 // Coalesce rows. 869 if (src_stride_y == width && 870 src_stride_u * 2 == width && 871 src_stride_v * 2 == width && 872 dst_stride_abgr == width * 4) { 873 width *= height; 874 height = 1; 875 src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0; 876 } 877 #if defined(HAS_I422TOABGRROW_NEON) 878 if (TestCpuFlag(kCpuHasNEON)) { 879 I422ToABGRRow = I422ToABGRRow_Any_NEON; 880 if (IS_ALIGNED(width, 16)) { 881 I422ToABGRRow = I422ToABGRRow_NEON; 882 } 883 } 884 #elif defined(HAS_I422TOABGRROW_SSSE3) 885 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 886 I422ToABGRRow = I422ToABGRRow_Any_SSSE3; 887 if (IS_ALIGNED(width, 8)) { 888 I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3; 889 if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) { 890 I422ToABGRRow = I422ToABGRRow_SSSE3; 891 } 892 } 893 } 894 #endif 895 896 for (y = 0; y < height; ++y) { 897 I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); 898 dst_abgr += dst_stride_abgr; 899 src_y += src_stride_y; 900 src_u += src_stride_u; 901 src_v += src_stride_v; 902 } 903 return 0; 904 } 905 906 // Convert I422 to RGBA. 907 LIBYUV_API 908 int I422ToRGBA(const uint8* src_y, int src_stride_y, 909 const uint8* src_u, int src_stride_u, 910 const uint8* src_v, int src_stride_v, 911 uint8* dst_rgba, int dst_stride_rgba, 912 int width, int height) { 913 int y; 914 void (*I422ToRGBARow)(const uint8* y_buf, 915 const uint8* u_buf, 916 const uint8* v_buf, 917 uint8* rgb_buf, 918 int width) = I422ToRGBARow_C; 919 if (!src_y || !src_u || !src_v || 920 !dst_rgba || 921 width <= 0 || height == 0) { 922 return -1; 923 } 924 // Negative height means invert the image. 925 if (height < 0) { 926 height = -height; 927 dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; 928 dst_stride_rgba = -dst_stride_rgba; 929 } 930 // Coalesce rows. 931 if (src_stride_y == width && 932 src_stride_u * 2 == width && 933 src_stride_v * 2 == width && 934 dst_stride_rgba == width * 4) { 935 width *= height; 936 height = 1; 937 src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0; 938 } 939 #if defined(HAS_I422TORGBAROW_NEON) 940 if (TestCpuFlag(kCpuHasNEON)) { 941 I422ToRGBARow = I422ToRGBARow_Any_NEON; 942 if (IS_ALIGNED(width, 16)) { 943 I422ToRGBARow = I422ToRGBARow_NEON; 944 } 945 } 946 #elif defined(HAS_I422TORGBAROW_SSSE3) 947 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 948 I422ToRGBARow = I422ToRGBARow_Any_SSSE3; 949 if (IS_ALIGNED(width, 8)) { 950 I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3; 951 if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) { 952 I422ToRGBARow = I422ToRGBARow_SSSE3; 953 } 954 } 955 } 956 #endif 957 958 for (y = 0; y < height; ++y) { 959 I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width); 960 dst_rgba += dst_stride_rgba; 961 src_y += src_stride_y; 962 src_u += src_stride_u; 963 src_v += src_stride_v; 964 } 965 return 0; 966 } 967 968 // Convert NV12 to RGB565. 969 LIBYUV_API 970 int NV12ToRGB565(const uint8* src_y, int src_stride_y, 971 const uint8* src_uv, int src_stride_uv, 972 uint8* dst_rgb565, int dst_stride_rgb565, 973 int width, int height) { 974 int y; 975 void (*NV12ToRGB565Row)(const uint8* y_buf, 976 const uint8* uv_buf, 977 uint8* rgb_buf, 978 int width) = NV12ToRGB565Row_C; 979 if (!src_y || !src_uv || !dst_rgb565 || 980 width <= 0 || height == 0) { 981 return -1; 982 } 983 // Negative height means invert the image. 984 if (height < 0) { 985 height = -height; 986 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; 987 dst_stride_rgb565 = -dst_stride_rgb565; 988 } 989 #if defined(HAS_NV12TORGB565ROW_SSSE3) 990 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 991 NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3; 992 if (IS_ALIGNED(width, 8)) { 993 NV12ToRGB565Row = NV12ToRGB565Row_SSSE3; 994 } 995 } 996 #elif defined(HAS_NV12TORGB565ROW_NEON) 997 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 998 NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON; 999 if (IS_ALIGNED(width, 8)) { 1000 NV12ToRGB565Row = NV12ToRGB565Row_NEON; 1001 } 1002 } 1003 #endif 1004 1005 for (y = 0; y < height; ++y) { 1006 NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width); 1007 dst_rgb565 += dst_stride_rgb565; 1008 src_y += src_stride_y; 1009 if (y & 1) { 1010 src_uv += src_stride_uv; 1011 } 1012 } 1013 return 0; 1014 } 1015 1016 // Convert NV21 to RGB565. 1017 LIBYUV_API 1018 int NV21ToRGB565(const uint8* src_y, int src_stride_y, 1019 const uint8* src_vu, int src_stride_vu, 1020 uint8* dst_rgb565, int dst_stride_rgb565, 1021 int width, int height) { 1022 int y; 1023 void (*NV21ToRGB565Row)(const uint8* y_buf, 1024 const uint8* src_vu, 1025 uint8* rgb_buf, 1026 int width) = NV21ToRGB565Row_C; 1027 if (!src_y || !src_vu || !dst_rgb565 || 1028 width <= 0 || height == 0) { 1029 return -1; 1030 } 1031 // Negative height means invert the image. 1032 if (height < 0) { 1033 height = -height; 1034 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; 1035 dst_stride_rgb565 = -dst_stride_rgb565; 1036 } 1037 #if defined(HAS_NV21TORGB565ROW_SSSE3) 1038 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 1039 NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3; 1040 if (IS_ALIGNED(width, 8)) { 1041 NV21ToRGB565Row = NV21ToRGB565Row_SSSE3; 1042 } 1043 } 1044 #elif defined(HAS_NV21TORGB565ROW_NEON) 1045 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1046 NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON; 1047 if (IS_ALIGNED(width, 8)) { 1048 NV21ToRGB565Row = NV21ToRGB565Row_NEON; 1049 } 1050 } 1051 #endif 1052 1053 for (y = 0; y < height; ++y) { 1054 NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width); 1055 dst_rgb565 += dst_stride_rgb565; 1056 src_y += src_stride_y; 1057 if (y & 1) { 1058 src_vu += src_stride_vu; 1059 } 1060 } 1061 return 0; 1062 } 1063 1064 LIBYUV_API 1065 void SetPlane(uint8* dst_y, int dst_stride_y, 1066 int width, int height, 1067 uint32 value) { 1068 int y; 1069 uint32 v32 = value | (value << 8) | (value << 16) | (value << 24); 1070 void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C; 1071 // Coalesce rows. 1072 if (dst_stride_y == width) { 1073 width *= height; 1074 height = 1; 1075 dst_stride_y = 0; 1076 } 1077 #if defined(HAS_SETROW_NEON) 1078 if (TestCpuFlag(kCpuHasNEON) && 1079 IS_ALIGNED(width, 16) && 1080 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 1081 SetRow = SetRow_NEON; 1082 } 1083 #endif 1084 #if defined(HAS_SETROW_X86) 1085 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { 1086 SetRow = SetRow_X86; 1087 } 1088 #endif 1089 1090 // Set plane 1091 for (y = 0; y < height; ++y) { 1092 SetRow(dst_y, v32, width); 1093 dst_y += dst_stride_y; 1094 } 1095 } 1096 1097 // Draw a rectangle into I420 1098 LIBYUV_API 1099 int I420Rect(uint8* dst_y, int dst_stride_y, 1100 uint8* dst_u, int dst_stride_u, 1101 uint8* dst_v, int dst_stride_v, 1102 int x, int y, 1103 int width, int height, 1104 int value_y, int value_u, int value_v) { 1105 int halfwidth = (width + 1) >> 1; 1106 int halfheight = (height + 1) >> 1; 1107 uint8* start_y = dst_y + y * dst_stride_y + x; 1108 uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); 1109 uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); 1110 if (!dst_y || !dst_u || !dst_v || 1111 width <= 0 || height <= 0 || 1112 x < 0 || y < 0 || 1113 value_y < 0 || value_y > 255 || 1114 value_u < 0 || value_u > 255 || 1115 value_v < 0 || value_v > 255) { 1116 return -1; 1117 } 1118 1119 SetPlane(start_y, dst_stride_y, width, height, value_y); 1120 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u); 1121 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v); 1122 return 0; 1123 } 1124 1125 // Draw a rectangle into ARGB 1126 LIBYUV_API 1127 int ARGBRect(uint8* dst_argb, int dst_stride_argb, 1128 int dst_x, int dst_y, 1129 int width, int height, 1130 uint32 value) { 1131 if (!dst_argb || 1132 width <= 0 || height <= 0 || 1133 dst_x < 0 || dst_y < 0) { 1134 return -1; 1135 } 1136 dst_argb += dst_y * dst_stride_argb + dst_x * 4; 1137 // Coalesce rows. 1138 if (dst_stride_argb == width * 4) { 1139 width *= height; 1140 height = 1; 1141 dst_stride_argb = 0; 1142 } 1143 #if defined(HAS_SETROW_NEON) 1144 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && 1145 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1146 ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height); 1147 return 0; 1148 } 1149 #endif 1150 #if defined(HAS_SETROW_X86) 1151 if (TestCpuFlag(kCpuHasX86)) { 1152 ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height); 1153 return 0; 1154 } 1155 #endif 1156 ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height); 1157 return 0; 1158 } 1159 1160 // Convert unattentuated ARGB to preattenuated ARGB. 1161 // An unattenutated ARGB alpha blend uses the formula 1162 // p = a * f + (1 - a) * b 1163 // where 1164 // p is output pixel 1165 // f is foreground pixel 1166 // b is background pixel 1167 // a is alpha value from foreground pixel 1168 // An preattenutated ARGB alpha blend uses the formula 1169 // p = f + (1 - a) * b 1170 // where 1171 // f is foreground pixel premultiplied by alpha 1172 1173 LIBYUV_API 1174 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, 1175 uint8* dst_argb, int dst_stride_argb, 1176 int width, int height) { 1177 int y; 1178 void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, 1179 int width) = ARGBAttenuateRow_C; 1180 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1181 return -1; 1182 } 1183 if (height < 0) { 1184 height = -height; 1185 src_argb = src_argb + (height - 1) * src_stride_argb; 1186 src_stride_argb = -src_stride_argb; 1187 } 1188 // Coalesce rows. 1189 if (src_stride_argb == width * 4 && 1190 dst_stride_argb == width * 4) { 1191 width *= height; 1192 height = 1; 1193 src_stride_argb = dst_stride_argb = 0; 1194 } 1195 #if defined(HAS_ARGBATTENUATEROW_SSE2) 1196 if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && 1197 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1198 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1199 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2; 1200 if (IS_ALIGNED(width, 4)) { 1201 ARGBAttenuateRow = ARGBAttenuateRow_SSE2; 1202 } 1203 } 1204 #endif 1205 #if defined(HAS_ARGBATTENUATEROW_SSSE3) 1206 if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { 1207 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; 1208 if (IS_ALIGNED(width, 4)) { 1209 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; 1210 } 1211 } 1212 #endif 1213 #if defined(HAS_ARGBATTENUATEROW_AVX2) 1214 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1215 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; 1216 if (IS_ALIGNED(width, 8)) { 1217 ARGBAttenuateRow = ARGBAttenuateRow_AVX2; 1218 } 1219 } 1220 #endif 1221 #if defined(HAS_ARGBATTENUATEROW_NEON) 1222 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1223 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; 1224 if (IS_ALIGNED(width, 8)) { 1225 ARGBAttenuateRow = ARGBAttenuateRow_NEON; 1226 } 1227 } 1228 #endif 1229 1230 for (y = 0; y < height; ++y) { 1231 ARGBAttenuateRow(src_argb, dst_argb, width); 1232 src_argb += src_stride_argb; 1233 dst_argb += dst_stride_argb; 1234 } 1235 return 0; 1236 } 1237 1238 // Convert preattentuated ARGB to unattenuated ARGB. 1239 LIBYUV_API 1240 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, 1241 uint8* dst_argb, int dst_stride_argb, 1242 int width, int height) { 1243 int y; 1244 void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, 1245 int width) = ARGBUnattenuateRow_C; 1246 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1247 return -1; 1248 } 1249 if (height < 0) { 1250 height = -height; 1251 src_argb = src_argb + (height - 1) * src_stride_argb; 1252 src_stride_argb = -src_stride_argb; 1253 } 1254 // Coalesce rows. 1255 if (src_stride_argb == width * 4 && 1256 dst_stride_argb == width * 4) { 1257 width *= height; 1258 height = 1; 1259 src_stride_argb = dst_stride_argb = 0; 1260 } 1261 #if defined(HAS_ARGBUNATTENUATEROW_SSE2) 1262 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1263 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2; 1264 if (IS_ALIGNED(width, 4)) { 1265 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; 1266 } 1267 } 1268 #endif 1269 #if defined(HAS_ARGBUNATTENUATEROW_AVX2) 1270 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1271 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2; 1272 if (IS_ALIGNED(width, 8)) { 1273 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2; 1274 } 1275 } 1276 #endif 1277 // TODO(fbarchard): Neon version. 1278 1279 for (y = 0; y < height; ++y) { 1280 ARGBUnattenuateRow(src_argb, dst_argb, width); 1281 src_argb += src_stride_argb; 1282 dst_argb += dst_stride_argb; 1283 } 1284 return 0; 1285 } 1286 1287 // Convert ARGB to Grayed ARGB. 1288 LIBYUV_API 1289 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, 1290 uint8* dst_argb, int dst_stride_argb, 1291 int width, int height) { 1292 int y; 1293 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, 1294 int width) = ARGBGrayRow_C; 1295 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1296 return -1; 1297 } 1298 if (height < 0) { 1299 height = -height; 1300 src_argb = src_argb + (height - 1) * src_stride_argb; 1301 src_stride_argb = -src_stride_argb; 1302 } 1303 // Coalesce rows. 1304 if (src_stride_argb == width * 4 && 1305 dst_stride_argb == width * 4) { 1306 width *= height; 1307 height = 1; 1308 src_stride_argb = dst_stride_argb = 0; 1309 } 1310 #if defined(HAS_ARGBGRAYROW_SSSE3) 1311 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1312 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1313 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1314 ARGBGrayRow = ARGBGrayRow_SSSE3; 1315 } 1316 #elif defined(HAS_ARGBGRAYROW_NEON) 1317 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1318 ARGBGrayRow = ARGBGrayRow_NEON; 1319 } 1320 #endif 1321 1322 for (y = 0; y < height; ++y) { 1323 ARGBGrayRow(src_argb, dst_argb, width); 1324 src_argb += src_stride_argb; 1325 dst_argb += dst_stride_argb; 1326 } 1327 return 0; 1328 } 1329 1330 // Make a rectangle of ARGB gray scale. 1331 LIBYUV_API 1332 int ARGBGray(uint8* dst_argb, int dst_stride_argb, 1333 int dst_x, int dst_y, 1334 int width, int height) { 1335 int y; 1336 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, 1337 int width) = ARGBGrayRow_C; 1338 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1339 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { 1340 return -1; 1341 } 1342 // Coalesce rows. 1343 if (dst_stride_argb == width * 4) { 1344 width *= height; 1345 height = 1; 1346 dst_stride_argb = 0; 1347 } 1348 #if defined(HAS_ARGBGRAYROW_SSSE3) 1349 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1350 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1351 ARGBGrayRow = ARGBGrayRow_SSSE3; 1352 } 1353 #elif defined(HAS_ARGBGRAYROW_NEON) 1354 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1355 ARGBGrayRow = ARGBGrayRow_NEON; 1356 } 1357 #endif 1358 for (y = 0; y < height; ++y) { 1359 ARGBGrayRow(dst, dst, width); 1360 dst += dst_stride_argb; 1361 } 1362 return 0; 1363 } 1364 1365 // Make a rectangle of ARGB Sepia tone. 1366 LIBYUV_API 1367 int ARGBSepia(uint8* dst_argb, int dst_stride_argb, 1368 int dst_x, int dst_y, int width, int height) { 1369 int y; 1370 void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C; 1371 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1372 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { 1373 return -1; 1374 } 1375 // Coalesce rows. 1376 if (dst_stride_argb == width * 4) { 1377 width *= height; 1378 height = 1; 1379 dst_stride_argb = 0; 1380 } 1381 #if defined(HAS_ARGBSEPIAROW_SSSE3) 1382 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1383 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1384 ARGBSepiaRow = ARGBSepiaRow_SSSE3; 1385 } 1386 #elif defined(HAS_ARGBSEPIAROW_NEON) 1387 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1388 ARGBSepiaRow = ARGBSepiaRow_NEON; 1389 } 1390 #endif 1391 for (y = 0; y < height; ++y) { 1392 ARGBSepiaRow(dst, width); 1393 dst += dst_stride_argb; 1394 } 1395 return 0; 1396 } 1397 1398 // Apply a 4x4 matrix to each ARGB pixel. 1399 // Note: Normally for shading, but can be used to swizzle or invert. 1400 LIBYUV_API 1401 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, 1402 uint8* dst_argb, int dst_stride_argb, 1403 const int8* matrix_argb, 1404 int width, int height) { 1405 int y; 1406 void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb, 1407 const int8* matrix_argb, int width) = ARGBColorMatrixRow_C; 1408 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) { 1409 return -1; 1410 } 1411 if (height < 0) { 1412 height = -height; 1413 src_argb = src_argb + (height - 1) * src_stride_argb; 1414 src_stride_argb = -src_stride_argb; 1415 } 1416 // Coalesce rows. 1417 if (src_stride_argb == width * 4 && 1418 dst_stride_argb == width * 4) { 1419 width *= height; 1420 height = 1; 1421 src_stride_argb = dst_stride_argb = 0; 1422 } 1423 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3) 1424 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1425 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1426 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3; 1427 } 1428 #elif defined(HAS_ARGBCOLORMATRIXROW_NEON) 1429 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1430 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON; 1431 } 1432 #endif 1433 for (y = 0; y < height; ++y) { 1434 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); 1435 src_argb += src_stride_argb; 1436 dst_argb += dst_stride_argb; 1437 } 1438 return 0; 1439 } 1440 1441 // Apply a 4x3 matrix to each ARGB pixel. 1442 // Deprecated. 1443 LIBYUV_API 1444 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, 1445 const int8* matrix_rgb, 1446 int dst_x, int dst_y, int width, int height) { 1447 SIMD_ALIGNED(int8 matrix_argb[16]); 1448 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1449 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || 1450 dst_x < 0 || dst_y < 0) { 1451 return -1; 1452 } 1453 1454 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix. 1455 matrix_argb[0] = matrix_rgb[0] / 2; 1456 matrix_argb[1] = matrix_rgb[1] / 2; 1457 matrix_argb[2] = matrix_rgb[2] / 2; 1458 matrix_argb[3] = matrix_rgb[3] / 2; 1459 matrix_argb[4] = matrix_rgb[4] / 2; 1460 matrix_argb[5] = matrix_rgb[5] / 2; 1461 matrix_argb[6] = matrix_rgb[6] / 2; 1462 matrix_argb[7] = matrix_rgb[7] / 2; 1463 matrix_argb[8] = matrix_rgb[8] / 2; 1464 matrix_argb[9] = matrix_rgb[9] / 2; 1465 matrix_argb[10] = matrix_rgb[10] / 2; 1466 matrix_argb[11] = matrix_rgb[11] / 2; 1467 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0; 1468 matrix_argb[15] = 64; // 1.0 1469 1470 return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, 1471 dst, dst_stride_argb, 1472 &matrix_argb[0], width, height); 1473 } 1474 1475 // Apply a color table each ARGB pixel. 1476 // Table contains 256 ARGB values. 1477 LIBYUV_API 1478 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, 1479 const uint8* table_argb, 1480 int dst_x, int dst_y, int width, int height) { 1481 int y; 1482 void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, 1483 int width) = ARGBColorTableRow_C; 1484 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1485 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || 1486 dst_x < 0 || dst_y < 0) { 1487 return -1; 1488 } 1489 // Coalesce rows. 1490 if (dst_stride_argb == width * 4) { 1491 width *= height; 1492 height = 1; 1493 dst_stride_argb = 0; 1494 } 1495 #if defined(HAS_ARGBCOLORTABLEROW_X86) 1496 if (TestCpuFlag(kCpuHasX86)) { 1497 ARGBColorTableRow = ARGBColorTableRow_X86; 1498 } 1499 #endif 1500 for (y = 0; y < height; ++y) { 1501 ARGBColorTableRow(dst, table_argb, width); 1502 dst += dst_stride_argb; 1503 } 1504 return 0; 1505 } 1506 1507 // Apply a color table each ARGB pixel but preserve destination alpha. 1508 // Table contains 256 ARGB values. 1509 LIBYUV_API 1510 int RGBColorTable(uint8* dst_argb, int dst_stride_argb, 1511 const uint8* table_argb, 1512 int dst_x, int dst_y, int width, int height) { 1513 int y; 1514 void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, 1515 int width) = RGBColorTableRow_C; 1516 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1517 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || 1518 dst_x < 0 || dst_y < 0) { 1519 return -1; 1520 } 1521 // Coalesce rows. 1522 if (dst_stride_argb == width * 4) { 1523 width *= height; 1524 height = 1; 1525 dst_stride_argb = 0; 1526 } 1527 #if defined(HAS_RGBCOLORTABLEROW_X86) 1528 if (TestCpuFlag(kCpuHasX86)) { 1529 RGBColorTableRow = RGBColorTableRow_X86; 1530 } 1531 #endif 1532 for (y = 0; y < height; ++y) { 1533 RGBColorTableRow(dst, table_argb, width); 1534 dst += dst_stride_argb; 1535 } 1536 return 0; 1537 } 1538 1539 // ARGBQuantize is used to posterize art. 1540 // e.g. rgb / qvalue * qvalue + qvalue / 2 1541 // But the low levels implement efficiently with 3 parameters, and could be 1542 // used for other high level operations. 1543 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; 1544 // where scale is 1 / interval_size as a fixed point value. 1545 // The divide is replaces with a multiply by reciprocal fixed point multiply. 1546 // Caveat - although SSE2 saturates, the C function does not and should be used 1547 // with care if doing anything but quantization. 1548 LIBYUV_API 1549 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, 1550 int scale, int interval_size, int interval_offset, 1551 int dst_x, int dst_y, int width, int height) { 1552 int y; 1553 void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size, 1554 int interval_offset, int width) = ARGBQuantizeRow_C; 1555 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1556 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || 1557 interval_size < 1 || interval_size > 255) { 1558 return -1; 1559 } 1560 // Coalesce rows. 1561 if (dst_stride_argb == width * 4) { 1562 width *= height; 1563 height = 1; 1564 dst_stride_argb = 0; 1565 } 1566 #if defined(HAS_ARGBQUANTIZEROW_SSE2) 1567 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && 1568 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1569 ARGBQuantizeRow = ARGBQuantizeRow_SSE2; 1570 } 1571 #elif defined(HAS_ARGBQUANTIZEROW_NEON) 1572 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1573 ARGBQuantizeRow = ARGBQuantizeRow_NEON; 1574 } 1575 #endif 1576 for (y = 0; y < height; ++y) { 1577 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width); 1578 dst += dst_stride_argb; 1579 } 1580 return 0; 1581 } 1582 1583 // Computes table of cumulative sum for image where the value is the sum 1584 // of all values above and to the left of the entry. Used by ARGBBlur. 1585 LIBYUV_API 1586 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, 1587 int32* dst_cumsum, int dst_stride32_cumsum, 1588 int width, int height) { 1589 int y; 1590 void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, 1591 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; 1592 int32* previous_cumsum = dst_cumsum; 1593 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) { 1594 return -1; 1595 } 1596 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) 1597 if (TestCpuFlag(kCpuHasSSE2)) { 1598 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; 1599 } 1600 #endif 1601 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel. 1602 for (y = 0; y < height; ++y) { 1603 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width); 1604 previous_cumsum = dst_cumsum; 1605 dst_cumsum += dst_stride32_cumsum; 1606 src_argb += src_stride_argb; 1607 } 1608 return 0; 1609 } 1610 1611 // Blur ARGB image. 1612 // Caller should allocate CumulativeSum table of width * height * 16 bytes 1613 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory 1614 // as the buffer is treated as circular. 1615 LIBYUV_API 1616 int ARGBBlur(const uint8* src_argb, int src_stride_argb, 1617 uint8* dst_argb, int dst_stride_argb, 1618 int32* dst_cumsum, int dst_stride32_cumsum, 1619 int width, int height, int radius) { 1620 int y; 1621 void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum, 1622 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; 1623 void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft, 1624 int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C; 1625 int32* cumsum_bot_row; 1626 int32* max_cumsum_bot_row; 1627 int32* cumsum_top_row; 1628 1629 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1630 return -1; 1631 } 1632 if (height < 0) { 1633 height = -height; 1634 src_argb = src_argb + (height - 1) * src_stride_argb; 1635 src_stride_argb = -src_stride_argb; 1636 } 1637 if (radius > height) { 1638 radius = height; 1639 } 1640 if (radius > (width / 2 - 1)) { 1641 radius = width / 2 - 1; 1642 } 1643 if (radius <= 0) { 1644 return -1; 1645 } 1646 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) 1647 if (TestCpuFlag(kCpuHasSSE2)) { 1648 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; 1649 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2; 1650 } 1651 #endif 1652 // Compute enough CumulativeSum for first row to be blurred. After this 1653 // one row of CumulativeSum is updated at a time. 1654 ARGBComputeCumulativeSum(src_argb, src_stride_argb, 1655 dst_cumsum, dst_stride32_cumsum, 1656 width, radius); 1657 1658 src_argb = src_argb + radius * src_stride_argb; 1659 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum]; 1660 1661 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum]; 1662 cumsum_top_row = &dst_cumsum[0]; 1663 1664 for (y = 0; y < height; ++y) { 1665 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0; 1666 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1); 1667 int area = radius * (bot_y - top_y); 1668 int boxwidth = radius * 4; 1669 int x; 1670 int n; 1671 1672 // Increment cumsum_top_row pointer with circular buffer wrap around. 1673 if (top_y) { 1674 cumsum_top_row += dst_stride32_cumsum; 1675 if (cumsum_top_row >= max_cumsum_bot_row) { 1676 cumsum_top_row = dst_cumsum; 1677 } 1678 } 1679 // Increment cumsum_bot_row pointer with circular buffer wrap around and 1680 // then fill in a row of CumulativeSum. 1681 if ((y + radius) < height) { 1682 const int32* prev_cumsum_bot_row = cumsum_bot_row; 1683 cumsum_bot_row += dst_stride32_cumsum; 1684 if (cumsum_bot_row >= max_cumsum_bot_row) { 1685 cumsum_bot_row = dst_cumsum; 1686 } 1687 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row, 1688 width); 1689 src_argb += src_stride_argb; 1690 } 1691 1692 // Left clipped. 1693 for (x = 0; x < radius + 1; ++x) { 1694 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, 1695 boxwidth, area, &dst_argb[x * 4], 1); 1696 area += (bot_y - top_y); 1697 boxwidth += 4; 1698 } 1699 1700 // Middle unclipped. 1701 n = (width - 1) - radius - x + 1; 1702 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, 1703 boxwidth, area, &dst_argb[x * 4], n); 1704 1705 // Right clipped. 1706 for (x += n; x <= width - 1; ++x) { 1707 area -= (bot_y - top_y); 1708 boxwidth -= 4; 1709 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4, 1710 cumsum_bot_row + (x - radius - 1) * 4, 1711 boxwidth, area, &dst_argb[x * 4], 1); 1712 } 1713 dst_argb += dst_stride_argb; 1714 } 1715 return 0; 1716 } 1717 1718 // Multiply ARGB image by a specified ARGB value. 1719 LIBYUV_API 1720 int ARGBShade(const uint8* src_argb, int src_stride_argb, 1721 uint8* dst_argb, int dst_stride_argb, 1722 int width, int height, uint32 value) { 1723 int y; 1724 void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, 1725 int width, uint32 value) = ARGBShadeRow_C; 1726 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { 1727 return -1; 1728 } 1729 if (height < 0) { 1730 height = -height; 1731 src_argb = src_argb + (height - 1) * src_stride_argb; 1732 src_stride_argb = -src_stride_argb; 1733 } 1734 // Coalesce rows. 1735 if (src_stride_argb == width * 4 && 1736 dst_stride_argb == width * 4) { 1737 width *= height; 1738 height = 1; 1739 src_stride_argb = dst_stride_argb = 0; 1740 } 1741 #if defined(HAS_ARGBSHADEROW_SSE2) 1742 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && 1743 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1744 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1745 ARGBShadeRow = ARGBShadeRow_SSE2; 1746 } 1747 #elif defined(HAS_ARGBSHADEROW_NEON) 1748 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1749 ARGBShadeRow = ARGBShadeRow_NEON; 1750 } 1751 #endif 1752 1753 for (y = 0; y < height; ++y) { 1754 ARGBShadeRow(src_argb, dst_argb, width, value); 1755 src_argb += src_stride_argb; 1756 dst_argb += dst_stride_argb; 1757 } 1758 return 0; 1759 } 1760 1761 // Interpolate 2 ARGB images by specified amount (0 to 255). 1762 LIBYUV_API 1763 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, 1764 const uint8* src_argb1, int src_stride_argb1, 1765 uint8* dst_argb, int dst_stride_argb, 1766 int width, int height, int interpolation) { 1767 int y; 1768 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, 1769 ptrdiff_t src_stride, int dst_width, 1770 int source_y_fraction) = InterpolateRow_C; 1771 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 1772 return -1; 1773 } 1774 // Negative height means invert the image. 1775 if (height < 0) { 1776 height = -height; 1777 dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1778 dst_stride_argb = -dst_stride_argb; 1779 } 1780 // Coalesce rows. 1781 if (src_stride_argb0 == width * 4 && 1782 src_stride_argb1 == width * 4 && 1783 dst_stride_argb == width * 4) { 1784 width *= height; 1785 height = 1; 1786 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 1787 } 1788 #if defined(HAS_INTERPOLATEROW_SSE2) 1789 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1790 InterpolateRow = InterpolateRow_Any_SSE2; 1791 if (IS_ALIGNED(width, 4)) { 1792 InterpolateRow = InterpolateRow_Unaligned_SSE2; 1793 if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && 1794 IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && 1795 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1796 InterpolateRow = InterpolateRow_SSE2; 1797 } 1798 } 1799 } 1800 #endif 1801 #if defined(HAS_INTERPOLATEROW_SSSE3) 1802 if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { 1803 InterpolateRow = InterpolateRow_Any_SSSE3; 1804 if (IS_ALIGNED(width, 4)) { 1805 InterpolateRow = InterpolateRow_Unaligned_SSSE3; 1806 if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && 1807 IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && 1808 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1809 InterpolateRow = InterpolateRow_SSSE3; 1810 } 1811 } 1812 } 1813 #endif 1814 #if defined(HAS_INTERPOLATEROW_AVX2) 1815 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1816 InterpolateRow = InterpolateRow_Any_AVX2; 1817 if (IS_ALIGNED(width, 8)) { 1818 InterpolateRow = InterpolateRow_AVX2; 1819 } 1820 } 1821 #endif 1822 #if defined(HAS_INTERPOLATEROW_NEON) 1823 if (TestCpuFlag(kCpuHasNEON) && width >= 4) { 1824 InterpolateRow = InterpolateRow_Any_NEON; 1825 if (IS_ALIGNED(width, 4)) { 1826 InterpolateRow = InterpolateRow_NEON; 1827 } 1828 } 1829 #endif 1830 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) 1831 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 && 1832 IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) && 1833 IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) && 1834 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { 1835 ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2; 1836 } 1837 #endif 1838 1839 for (y = 0; y < height; ++y) { 1840 InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0, 1841 width * 4, interpolation); 1842 src_argb0 += src_stride_argb0; 1843 src_argb1 += src_stride_argb1; 1844 dst_argb += dst_stride_argb; 1845 } 1846 return 0; 1847 } 1848 1849 // Shuffle ARGB channel order. e.g. BGRA to ARGB. 1850 LIBYUV_API 1851 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, 1852 uint8* dst_argb, int dst_stride_argb, 1853 const uint8* shuffler, int width, int height) { 1854 int y; 1855 void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb, 1856 const uint8* shuffler, int pix) = ARGBShuffleRow_C; 1857 if (!src_bgra || !dst_argb || 1858 width <= 0 || height == 0) { 1859 return -1; 1860 } 1861 // Negative height means invert the image. 1862 if (height < 0) { 1863 height = -height; 1864 src_bgra = src_bgra + (height - 1) * src_stride_bgra; 1865 src_stride_bgra = -src_stride_bgra; 1866 } 1867 // Coalesce rows. 1868 if (src_stride_bgra == width * 4 && 1869 dst_stride_argb == width * 4) { 1870 width *= height; 1871 height = 1; 1872 src_stride_bgra = dst_stride_argb = 0; 1873 } 1874 #if defined(HAS_ARGBSHUFFLEROW_SSE2) 1875 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1876 ARGBShuffleRow = ARGBShuffleRow_Any_SSE2; 1877 if (IS_ALIGNED(width, 4)) { 1878 ARGBShuffleRow = ARGBShuffleRow_SSE2; 1879 } 1880 } 1881 #endif 1882 #if defined(HAS_ARGBSHUFFLEROW_SSSE3) 1883 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 1884 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3; 1885 if (IS_ALIGNED(width, 8)) { 1886 ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3; 1887 if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) && 1888 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1889 ARGBShuffleRow = ARGBShuffleRow_SSSE3; 1890 } 1891 } 1892 } 1893 #endif 1894 #if defined(HAS_ARGBSHUFFLEROW_AVX2) 1895 if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { 1896 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2; 1897 if (IS_ALIGNED(width, 16)) { 1898 ARGBShuffleRow = ARGBShuffleRow_AVX2; 1899 } 1900 } 1901 #endif 1902 #if defined(HAS_ARGBSHUFFLEROW_NEON) 1903 if (TestCpuFlag(kCpuHasNEON) && width >= 4) { 1904 ARGBShuffleRow = ARGBShuffleRow_Any_NEON; 1905 if (IS_ALIGNED(width, 4)) { 1906 ARGBShuffleRow = ARGBShuffleRow_NEON; 1907 } 1908 } 1909 #endif 1910 1911 for (y = 0; y < height; ++y) { 1912 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width); 1913 src_bgra += src_stride_bgra; 1914 dst_argb += dst_stride_argb; 1915 } 1916 return 0; 1917 } 1918 1919 // Sobel ARGB effect. 1920 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, 1921 uint8* dst_argb, int dst_stride_argb, 1922 int width, int height, 1923 void (*SobelRow)(const uint8* src_sobelx, 1924 const uint8* src_sobely, 1925 uint8* dst, int width)) { 1926 int y; 1927 void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, 1928 uint32 selector, int pix) = ARGBToBayerGGRow_C; 1929 void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, 1930 uint8* dst_sobely, int width) = SobelYRow_C; 1931 void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, 1932 const uint8* src_y2, uint8* dst_sobely, int width) = 1933 SobelXRow_C; 1934 const int kEdge = 16; // Extra pixels at start of row for extrude/align. 1935 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1936 return -1; 1937 } 1938 // Negative height means invert the image. 1939 if (height < 0) { 1940 height = -height; 1941 src_argb = src_argb + (height - 1) * src_stride_argb; 1942 src_stride_argb = -src_stride_argb; 1943 } 1944 // ARGBToBayer used to select G channel from ARGB. 1945 #if defined(HAS_ARGBTOBAYERGGROW_SSE2) 1946 if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && 1947 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { 1948 ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2; 1949 if (IS_ALIGNED(width, 8)) { 1950 ARGBToBayerRow = ARGBToBayerGGRow_SSE2; 1951 } 1952 } 1953 #endif 1954 #if defined(HAS_ARGBTOBAYERROW_SSSE3) 1955 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 && 1956 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { 1957 ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3; 1958 if (IS_ALIGNED(width, 8)) { 1959 ARGBToBayerRow = ARGBToBayerRow_SSSE3; 1960 } 1961 } 1962 #endif 1963 #if defined(HAS_ARGBTOBAYERGGROW_NEON) 1964 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1965 ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON; 1966 if (IS_ALIGNED(width, 8)) { 1967 ARGBToBayerRow = ARGBToBayerGGRow_NEON; 1968 } 1969 } 1970 #endif 1971 #if defined(HAS_SOBELYROW_SSE2) 1972 if (TestCpuFlag(kCpuHasSSE2)) { 1973 SobelYRow = SobelYRow_SSE2; 1974 } 1975 #endif 1976 #if defined(HAS_SOBELYROW_NEON) 1977 if (TestCpuFlag(kCpuHasNEON)) { 1978 SobelYRow = SobelYRow_NEON; 1979 } 1980 #endif 1981 #if defined(HAS_SOBELXROW_SSE2) 1982 if (TestCpuFlag(kCpuHasSSE2)) { 1983 SobelXRow = SobelXRow_SSE2; 1984 } 1985 #endif 1986 #if defined(HAS_SOBELXROW_NEON) 1987 if (TestCpuFlag(kCpuHasNEON)) { 1988 SobelXRow = SobelXRow_NEON; 1989 } 1990 #endif 1991 { 1992 // 3 rows with edges before/after. 1993 const int kRowSize = (width + kEdge + 15) & ~15; 1994 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); 1995 uint8* row_sobelx = rows; 1996 uint8* row_sobely = rows + kRowSize; 1997 uint8* row_y = rows + kRowSize * 2; 1998 1999 // Convert first row. 2000 uint8* row_y0 = row_y + kEdge; 2001 uint8* row_y1 = row_y0 + kRowSize; 2002 uint8* row_y2 = row_y1 + kRowSize; 2003 ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width); 2004 row_y0[-1] = row_y0[0]; 2005 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. 2006 ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width); 2007 row_y1[-1] = row_y1[0]; 2008 memset(row_y1 + width, row_y1[width - 1], 16); 2009 memset(row_y2 + width, 0, 16); 2010 2011 for (y = 0; y < height; ++y) { 2012 // Convert next row of ARGB to Y. 2013 if (y < (height - 1)) { 2014 src_argb += src_stride_argb; 2015 } 2016 ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width); 2017 row_y2[-1] = row_y2[0]; 2018 row_y2[width] = row_y2[width - 1]; 2019 2020 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width); 2021 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width); 2022 SobelRow(row_sobelx, row_sobely, dst_argb, width); 2023 2024 // Cycle thru circular queue of 3 row_y buffers. 2025 { 2026 uint8* row_yt = row_y0; 2027 row_y0 = row_y1; 2028 row_y1 = row_y2; 2029 row_y2 = row_yt; 2030 } 2031 2032 dst_argb += dst_stride_argb; 2033 } 2034 free_aligned_buffer_64(rows); 2035 } 2036 return 0; 2037 } 2038 2039 // Sobel ARGB effect. 2040 LIBYUV_API 2041 int ARGBSobel(const uint8* src_argb, int src_stride_argb, 2042 uint8* dst_argb, int dst_stride_argb, 2043 int width, int height) { 2044 void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely, 2045 uint8* dst_argb, int width) = SobelRow_C; 2046 #if defined(HAS_SOBELROW_SSE2) 2047 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && 2048 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 2049 SobelRow = SobelRow_SSE2; 2050 } 2051 #endif 2052 #if defined(HAS_SOBELROW_NEON) 2053 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 2054 SobelRow = SobelRow_NEON; 2055 } 2056 #endif 2057 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, 2058 width, height, SobelRow); 2059 } 2060 2061 // Sobel ARGB effect with planar output. 2062 LIBYUV_API 2063 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, 2064 uint8* dst_y, int dst_stride_y, 2065 int width, int height) { 2066 void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely, 2067 uint8* dst_, int width) = SobelToPlaneRow_C; 2068 #if defined(HAS_SOBELTOPLANEROW_SSE2) 2069 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && 2070 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 2071 SobelToPlaneRow = SobelToPlaneRow_SSE2; 2072 } 2073 #endif 2074 #if defined(HAS_SOBELTOPLANEROW_NEON) 2075 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { 2076 SobelToPlaneRow = SobelToPlaneRow_NEON; 2077 } 2078 #endif 2079 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, 2080 width, height, SobelToPlaneRow); 2081 } 2082 2083 // SobelXY ARGB effect. 2084 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. 2085 LIBYUV_API 2086 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, 2087 uint8* dst_argb, int dst_stride_argb, 2088 int width, int height) { 2089 void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, 2090 uint8* dst_argb, int width) = SobelXYRow_C; 2091 #if defined(HAS_SOBELXYROW_SSE2) 2092 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && 2093 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 2094 SobelXYRow = SobelXYRow_SSE2; 2095 } 2096 #endif 2097 #if defined(HAS_SOBELXYROW_NEON) 2098 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 2099 SobelXYRow = SobelXYRow_NEON; 2100 } 2101 #endif 2102 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, 2103 width, height, SobelXYRow); 2104 } 2105 2106 // Apply a 4x4 polynomial to each ARGB pixel. 2107 LIBYUV_API 2108 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, 2109 uint8* dst_argb, int dst_stride_argb, 2110 const float* poly, 2111 int width, int height) { 2112 int y; 2113 void (*ARGBPolynomialRow)(const uint8* src_argb, 2114 uint8* dst_argb, const float* poly, 2115 int width) = ARGBPolynomialRow_C; 2116 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) { 2117 return -1; 2118 } 2119 // Negative height means invert the image. 2120 if (height < 0) { 2121 height = -height; 2122 src_argb = src_argb + (height - 1) * src_stride_argb; 2123 src_stride_argb = -src_stride_argb; 2124 } 2125 // Coalesce rows. 2126 if (src_stride_argb == width * 4 && 2127 dst_stride_argb == width * 4) { 2128 width *= height; 2129 height = 1; 2130 src_stride_argb = dst_stride_argb = 0; 2131 } 2132 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2) 2133 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) { 2134 ARGBPolynomialRow = ARGBPolynomialRow_SSE2; 2135 } 2136 #endif 2137 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2) 2138 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) && 2139 IS_ALIGNED(width, 2)) { 2140 ARGBPolynomialRow = ARGBPolynomialRow_AVX2; 2141 } 2142 #endif 2143 2144 for (y = 0; y < height; ++y) { 2145 ARGBPolynomialRow(src_argb, dst_argb, poly, width); 2146 src_argb += src_stride_argb; 2147 dst_argb += dst_stride_argb; 2148 } 2149 return 0; 2150 } 2151 2152 // Apply a lumacolortable to each ARGB pixel. 2153 LIBYUV_API 2154 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, 2155 uint8* dst_argb, int dst_stride_argb, 2156 const uint8* luma, 2157 int width, int height) { 2158 int y; 2159 void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb, 2160 int width, const uint8* luma, const uint32 lumacoeff) = 2161 ARGBLumaColorTableRow_C; 2162 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { 2163 return -1; 2164 } 2165 // Negative height means invert the image. 2166 if (height < 0) { 2167 height = -height; 2168 src_argb = src_argb + (height - 1) * src_stride_argb; 2169 src_stride_argb = -src_stride_argb; 2170 } 2171 // Coalesce rows. 2172 if (src_stride_argb == width * 4 && 2173 dst_stride_argb == width * 4) { 2174 width *= height; 2175 height = 1; 2176 src_stride_argb = dst_stride_argb = 0; 2177 } 2178 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3) 2179 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { 2180 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3; 2181 } 2182 #endif 2183 2184 for (y = 0; y < height; ++y) { 2185 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f); 2186 src_argb += src_stride_argb; 2187 dst_argb += dst_stride_argb; 2188 } 2189 return 0; 2190 } 2191 2192 // Copy Alpha from one ARGB image to another. 2193 LIBYUV_API 2194 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, 2195 uint8* dst_argb, int dst_stride_argb, 2196 int width, int height) { 2197 int y; 2198 void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) = 2199 ARGBCopyAlphaRow_C; 2200 if (!src_argb || !dst_argb || width <= 0 || height == 0) { 2201 return -1; 2202 } 2203 // Negative height means invert the image. 2204 if (height < 0) { 2205 height = -height; 2206 src_argb = src_argb + (height - 1) * src_stride_argb; 2207 src_stride_argb = -src_stride_argb; 2208 } 2209 // Coalesce rows. 2210 if (src_stride_argb == width * 4 && 2211 dst_stride_argb == width * 4) { 2212 width *= height; 2213 height = 1; 2214 src_stride_argb = dst_stride_argb = 0; 2215 } 2216 #if defined(HAS_ARGBCOPYALPHAROW_SSE2) 2217 if (TestCpuFlag(kCpuHasSSE2) && 2218 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 2219 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && 2220 IS_ALIGNED(width, 8)) { 2221 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2; 2222 } 2223 #endif 2224 #if defined(HAS_ARGBCOPYALPHAROW_AVX2) 2225 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { 2226 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2; 2227 } 2228 #endif 2229 2230 for (y = 0; y < height; ++y) { 2231 ARGBCopyAlphaRow(src_argb, dst_argb, width); 2232 src_argb += src_stride_argb; 2233 dst_argb += dst_stride_argb; 2234 } 2235 return 0; 2236 } 2237 2238 // Copy a planar Y channel to the alpha channel of a destination ARGB image. 2239 LIBYUV_API 2240 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, 2241 uint8* dst_argb, int dst_stride_argb, 2242 int width, int height) { 2243 int y; 2244 void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = 2245 ARGBCopyYToAlphaRow_C; 2246 if (!src_y || !dst_argb || width <= 0 || height == 0) { 2247 return -1; 2248 } 2249 // Negative height means invert the image. 2250 if (height < 0) { 2251 height = -height; 2252 src_y = src_y + (height - 1) * src_stride_y; 2253 src_stride_y = -src_stride_y; 2254 } 2255 // Coalesce rows. 2256 if (src_stride_y == width && 2257 dst_stride_argb == width * 4) { 2258 width *= height; 2259 height = 1; 2260 src_stride_y = dst_stride_argb = 0; 2261 } 2262 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2) 2263 if (TestCpuFlag(kCpuHasSSE2) && 2264 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 2265 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && 2266 IS_ALIGNED(width, 8)) { 2267 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; 2268 } 2269 #endif 2270 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2) 2271 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { 2272 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; 2273 } 2274 #endif 2275 2276 for (y = 0; y < height; ++y) { 2277 ARGBCopyYToAlphaRow(src_y, dst_argb, width); 2278 src_y += src_stride_y; 2279 dst_argb += dst_stride_argb; 2280 } 2281 return 0; 2282 } 2283 2284 #ifdef __cplusplus 2285 } // extern "C" 2286 } // namespace libyuv 2287 #endif 2288