1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/scale.h" 12 13 #include <assert.h> 14 #include <string.h> 15 16 #include "libyuv/cpu_id.h" 17 #include "libyuv/planar_functions.h" // For CopyARGB 18 #include "libyuv/row.h" 19 #include "libyuv/scale_row.h" 20 21 #ifdef __cplusplus 22 namespace libyuv { 23 extern "C" { 24 #endif 25 26 static __inline int Abs(int v) { 27 return v >= 0 ? v : -v; 28 } 29 30 // ScaleARGB ARGB, 1/2 31 // This is an optimized version for scaling down a ARGB to 1/2 of 32 // its original size. 33 static void ScaleARGBDown2(int src_width, int src_height, 34 int dst_width, int dst_height, 35 int src_stride, int dst_stride, 36 const uint8* src_argb, uint8* dst_argb, 37 int x, int dx, int y, int dy, 38 enum FilterMode filtering) { 39 int j; 40 int row_stride = src_stride * (dy >> 16); 41 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, 42 uint8* dst_argb, int dst_width) = 43 filtering == kFilterNone ? ScaleARGBRowDown2_C : 44 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C : 45 ScaleARGBRowDown2Box_C); 46 assert(dx == 65536 * 2); // Test scale factor of 2. 47 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. 48 // Advance to odd row, even column. 49 if (filtering == kFilterBilinear) { 50 src_argb += (y >> 16) * src_stride + (x >> 16) * 4; 51 } else { 52 src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4; 53 } 54 55 #if defined(HAS_SCALEARGBROWDOWN2_SSE2) 56 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && 57 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && 58 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 59 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 : 60 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 : 61 ScaleARGBRowDown2Box_SSE2); 62 } 63 #elif defined(HAS_SCALEARGBROWDOWN2_NEON) 64 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && 65 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) { 66 ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON : 67 ScaleARGBRowDown2_NEON; 68 } 69 #endif 70 71 if (filtering == kFilterLinear) { 72 src_stride = 0; 73 } 74 for (j = 0; j < dst_height; ++j) { 75 ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width); 76 src_argb += row_stride; 77 dst_argb += dst_stride; 78 } 79 } 80 81 // ScaleARGB ARGB, 1/4 82 // This is an optimized version for scaling down a ARGB to 1/4 of 83 // its original size. 84 static void ScaleARGBDown4Box(int src_width, int src_height, 85 int dst_width, int dst_height, 86 int src_stride, int dst_stride, 87 const uint8* src_argb, uint8* dst_argb, 88 int x, int dx, int y, int dy) { 89 int j; 90 // Allocate 2 rows of ARGB. 91 const int kRowSize = (dst_width * 2 * 4 + 15) & ~15; 92 align_buffer_64(row, kRowSize * 2); 93 int row_stride = src_stride * (dy >> 16); 94 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, 95 uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; 96 // Advance to odd row, even column. 97 src_argb += (y >> 16) * src_stride + (x >> 16) * 4; 98 assert(dx == 65536 * 4); // Test scale factor of 4. 99 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. 100 #if defined(HAS_SCALEARGBROWDOWN2_SSE2) 101 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && 102 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && 103 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 104 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; 105 } 106 #elif defined(HAS_SCALEARGBROWDOWN2_NEON) 107 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && 108 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) { 109 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON; 110 } 111 #endif 112 for (j = 0; j < dst_height; ++j) { 113 ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); 114 ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, 115 row + kRowSize, dst_width * 2); 116 ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); 117 src_argb += row_stride; 118 dst_argb += dst_stride; 119 } 120 free_aligned_buffer_64(row); 121 } 122 123 // ScaleARGB ARGB Even 124 // This is an optimized version for scaling down a ARGB to even 125 // multiple of its original size. 126 static void ScaleARGBDownEven(int src_width, int src_height, 127 int dst_width, int dst_height, 128 int src_stride, int dst_stride, 129 const uint8* src_argb, uint8* dst_argb, 130 int x, int dx, int y, int dy, 131 enum FilterMode filtering) { 132 int j; 133 int col_step = dx >> 16; 134 int row_stride = (dy >> 16) * src_stride; 135 void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride, 136 int src_step, uint8* dst_argb, int dst_width) = 137 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; 138 assert(IS_ALIGNED(src_width, 2)); 139 assert(IS_ALIGNED(src_height, 2)); 140 src_argb += (y >> 16) * src_stride + (x >> 16) * 4; 141 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) 142 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && 143 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 144 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : 145 ScaleARGBRowDownEven_SSE2; 146 } 147 #elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON) 148 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) && 149 IS_ALIGNED(src_argb, 4)) { 150 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : 151 ScaleARGBRowDownEven_NEON; 152 } 153 #endif 154 155 if (filtering == kFilterLinear) { 156 src_stride = 0; 157 } 158 for (j = 0; j < dst_height; ++j) { 159 ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width); 160 src_argb += row_stride; 161 dst_argb += dst_stride; 162 } 163 } 164 165 // Scale ARGB down with bilinear interpolation. 166 static void ScaleARGBBilinearDown(int src_width, int src_height, 167 int dst_width, int dst_height, 168 int src_stride, int dst_stride, 169 const uint8* src_argb, uint8* dst_argb, 170 int x, int dx, int y, int dy, 171 enum FilterMode filtering) { 172 int j; 173 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, 174 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 175 InterpolateRow_C; 176 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, 177 int dst_width, int x, int dx) = 178 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; 179 int64 xlast = x + (int64)(dst_width - 1) * dx; 180 int64 xl = (dx >= 0) ? x : xlast; 181 int64 xr = (dx >= 0) ? xlast : x; 182 int clip_src_width; 183 xl = (xl >> 16) & ~3; // Left edge aligned. 184 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels. 185 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel. 186 if (xr > src_width) { 187 xr = src_width; 188 } 189 clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4. 190 src_argb += xl * 4; 191 x -= (int)(xl << 16); 192 #if defined(HAS_INTERPOLATEROW_SSE2) 193 if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) { 194 InterpolateRow = InterpolateRow_Any_SSE2; 195 if (IS_ALIGNED(clip_src_width, 16)) { 196 InterpolateRow = InterpolateRow_Unaligned_SSE2; 197 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { 198 InterpolateRow = InterpolateRow_SSE2; 199 } 200 } 201 } 202 #endif 203 #if defined(HAS_INTERPOLATEROW_SSSE3) 204 if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) { 205 InterpolateRow = InterpolateRow_Any_SSSE3; 206 if (IS_ALIGNED(clip_src_width, 16)) { 207 InterpolateRow = InterpolateRow_Unaligned_SSSE3; 208 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { 209 InterpolateRow = InterpolateRow_SSSE3; 210 } 211 } 212 } 213 #endif 214 #if defined(HAS_INTERPOLATEROW_AVX2) 215 if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) { 216 InterpolateRow = InterpolateRow_Any_AVX2; 217 if (IS_ALIGNED(clip_src_width, 32)) { 218 InterpolateRow = InterpolateRow_AVX2; 219 } 220 } 221 #endif 222 #if defined(HAS_INTERPOLATEROW_NEON) 223 if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) { 224 InterpolateRow = InterpolateRow_Any_NEON; 225 if (IS_ALIGNED(clip_src_width, 16)) { 226 InterpolateRow = InterpolateRow_NEON; 227 } 228 } 229 #endif 230 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) 231 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 && 232 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) { 233 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; 234 if (IS_ALIGNED(clip_src_width, 4)) { 235 InterpolateRow = InterpolateRow_MIPS_DSPR2; 236 } 237 } 238 #endif 239 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) 240 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 241 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; 242 } 243 #endif 244 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. 245 // Allocate a row of ARGB. 246 { 247 align_buffer_64(row, clip_src_width * 4); 248 249 const int max_y = (src_height - 1) << 16; 250 if (y > max_y) { 251 y = max_y; 252 } 253 for (j = 0; j < dst_height; ++j) { 254 int yi = y >> 16; 255 const uint8* src = src_argb + yi * src_stride; 256 if (filtering == kFilterLinear) { 257 ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); 258 } else { 259 int yf = (y >> 8) & 255; 260 InterpolateRow(row, src, src_stride, clip_src_width, yf); 261 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); 262 } 263 dst_argb += dst_stride; 264 y += dy; 265 if (y > max_y) { 266 y = max_y; 267 } 268 } 269 free_aligned_buffer_64(row); 270 } 271 } 272 273 // Scale ARGB up with bilinear interpolation. 274 static void ScaleARGBBilinearUp(int src_width, int src_height, 275 int dst_width, int dst_height, 276 int src_stride, int dst_stride, 277 const uint8* src_argb, uint8* dst_argb, 278 int x, int dx, int y, int dy, 279 enum FilterMode filtering) { 280 int j; 281 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, 282 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 283 InterpolateRow_C; 284 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, 285 int dst_width, int x, int dx) = 286 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; 287 const int max_y = (src_height - 1) << 16; 288 #if defined(HAS_INTERPOLATEROW_SSE2) 289 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) { 290 InterpolateRow = InterpolateRow_Any_SSE2; 291 if (IS_ALIGNED(dst_width, 4)) { 292 InterpolateRow = InterpolateRow_Unaligned_SSE2; 293 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 294 InterpolateRow = InterpolateRow_SSE2; 295 } 296 } 297 } 298 #endif 299 #if defined(HAS_INTERPOLATEROW_SSSE3) 300 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { 301 InterpolateRow = InterpolateRow_Any_SSSE3; 302 if (IS_ALIGNED(dst_width, 4)) { 303 InterpolateRow = InterpolateRow_Unaligned_SSSE3; 304 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 305 InterpolateRow = InterpolateRow_SSSE3; 306 } 307 } 308 } 309 #endif 310 #if defined(HAS_INTERPOLATEROW_AVX2) 311 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) { 312 InterpolateRow = InterpolateRow_Any_AVX2; 313 if (IS_ALIGNED(dst_width, 8)) { 314 InterpolateRow = InterpolateRow_AVX2; 315 } 316 } 317 #endif 318 #if defined(HAS_INTERPOLATEROW_NEON) 319 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { 320 InterpolateRow = InterpolateRow_Any_NEON; 321 if (IS_ALIGNED(dst_width, 4)) { 322 InterpolateRow = InterpolateRow_NEON; 323 } 324 } 325 #endif 326 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) 327 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 && 328 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { 329 InterpolateRow = InterpolateRow_MIPS_DSPR2; 330 } 331 #endif 332 if (src_width >= 32768) { 333 ScaleARGBFilterCols = filtering ? 334 ScaleARGBFilterCols64_C : ScaleARGBCols64_C; 335 } 336 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) 337 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 338 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; 339 } 340 #endif 341 #if defined(HAS_SCALEARGBCOLS_SSE2) 342 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { 343 ScaleARGBFilterCols = ScaleARGBCols_SSE2; 344 } 345 #endif 346 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 347 ScaleARGBFilterCols = ScaleARGBColsUp2_C; 348 #if defined(HAS_SCALEARGBCOLSUP2_SSE2) 349 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && 350 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && 351 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 352 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; 353 } 354 #endif 355 } 356 357 if (y > max_y) { 358 y = max_y; 359 } 360 361 { 362 int yi = y >> 16; 363 const uint8* src = src_argb + yi * src_stride; 364 365 // Allocate 2 rows of ARGB. 366 const int kRowSize = (dst_width * 4 + 15) & ~15; 367 align_buffer_64(row, kRowSize * 2); 368 369 uint8* rowptr = row; 370 int rowstride = kRowSize; 371 int lasty = yi; 372 373 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); 374 if (src_height > 1) { 375 src += src_stride; 376 } 377 ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx); 378 src += src_stride; 379 380 for (j = 0; j < dst_height; ++j) { 381 yi = y >> 16; 382 if (yi != lasty) { 383 if (y > max_y) { 384 y = max_y; 385 yi = y >> 16; 386 src = src_argb + yi * src_stride; 387 } 388 if (yi != lasty) { 389 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); 390 rowptr += rowstride; 391 rowstride = -rowstride; 392 lasty = yi; 393 src += src_stride; 394 } 395 } 396 if (filtering == kFilterLinear) { 397 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); 398 } else { 399 int yf = (y >> 8) & 255; 400 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); 401 } 402 dst_argb += dst_stride; 403 y += dy; 404 } 405 free_aligned_buffer_64(row); 406 } 407 } 408 409 #ifdef YUVSCALEUP 410 // Scale YUV to ARGB up with bilinear interpolation. 411 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, 412 int dst_width, int dst_height, 413 int src_stride_y, 414 int src_stride_u, 415 int src_stride_v, 416 int dst_stride_argb, 417 const uint8* src_y, 418 const uint8* src_u, 419 const uint8* src_v, 420 uint8* dst_argb, 421 int x, int dx, int y, int dy, 422 enum FilterMode filtering) { 423 int j; 424 void (*I422ToARGBRow)(const uint8* y_buf, 425 const uint8* u_buf, 426 const uint8* v_buf, 427 uint8* rgb_buf, 428 int width) = I422ToARGBRow_C; 429 #if defined(HAS_I422TOARGBROW_SSSE3) 430 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) { 431 I422ToARGBRow = I422ToARGBRow_Any_SSSE3; 432 if (IS_ALIGNED(src_width, 8)) { 433 I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3; 434 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 435 I422ToARGBRow = I422ToARGBRow_SSSE3; 436 } 437 } 438 } 439 #endif 440 #if defined(HAS_I422TOARGBROW_AVX2) 441 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) { 442 I422ToARGBRow = I422ToARGBRow_Any_AVX2; 443 if (IS_ALIGNED(src_width, 16)) { 444 I422ToARGBRow = I422ToARGBRow_AVX2; 445 } 446 } 447 #endif 448 #if defined(HAS_I422TOARGBROW_NEON) 449 if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) { 450 I422ToARGBRow = I422ToARGBRow_Any_NEON; 451 if (IS_ALIGNED(src_width, 8)) { 452 I422ToARGBRow = I422ToARGBRow_NEON; 453 } 454 } 455 #endif 456 #if defined(HAS_I422TOARGBROW_MIPS_DSPR2) 457 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) && 458 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && 459 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && 460 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && 461 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { 462 I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; 463 } 464 #endif 465 466 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, 467 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 468 InterpolateRow_C; 469 #if defined(HAS_INTERPOLATEROW_SSE2) 470 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) { 471 InterpolateRow = InterpolateRow_Any_SSE2; 472 if (IS_ALIGNED(dst_width, 4)) { 473 InterpolateRow = InterpolateRow_Unaligned_SSE2; 474 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 475 InterpolateRow = InterpolateRow_SSE2; 476 } 477 } 478 } 479 #endif 480 #if defined(HAS_INTERPOLATEROW_SSSE3) 481 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { 482 InterpolateRow = InterpolateRow_Any_SSSE3; 483 if (IS_ALIGNED(dst_width, 4)) { 484 InterpolateRow = InterpolateRow_Unaligned_SSSE3; 485 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 486 InterpolateRow = InterpolateRow_SSSE3; 487 } 488 } 489 } 490 #endif 491 #if defined(HAS_INTERPOLATEROW_AVX2) 492 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) { 493 InterpolateRow = InterpolateRow_Any_AVX2; 494 if (IS_ALIGNED(dst_width, 8)) { 495 InterpolateRow = InterpolateRow_AVX2; 496 } 497 } 498 #endif 499 #if defined(HAS_INTERPOLATEROW_NEON) 500 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { 501 InterpolateRow = InterpolateRow_Any_NEON; 502 if (IS_ALIGNED(dst_width, 4)) { 503 InterpolateRow = InterpolateRow_NEON; 504 } 505 } 506 #endif 507 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) 508 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 && 509 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { 510 InterpolateRow = InterpolateRow_MIPS_DSPR2; 511 } 512 #endif 513 514 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, 515 int dst_width, int x, int dx) = 516 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; 517 if (src_width >= 32768) { 518 ScaleARGBFilterCols = filtering ? 519 ScaleARGBFilterCols64_C : ScaleARGBCols64_C; 520 } 521 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) 522 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 523 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; 524 } 525 #endif 526 #if defined(HAS_SCALEARGBCOLS_SSE2) 527 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { 528 ScaleARGBFilterCols = ScaleARGBCols_SSE2; 529 } 530 #endif 531 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 532 ScaleARGBFilterCols = ScaleARGBColsUp2_C; 533 #if defined(HAS_SCALEARGBCOLSUP2_SSE2) 534 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && 535 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && 536 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 537 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; 538 } 539 #endif 540 } 541 542 const int max_y = (src_height - 1) << 16; 543 if (y > max_y) { 544 y = max_y; 545 } 546 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. 547 int yi = y >> 16; 548 int uv_yi = yi >> kYShift; 549 const uint8* src_row_y = src_y + yi * src_stride_y; 550 const uint8* src_row_u = src_u + uv_yi * src_stride_u; 551 const uint8* src_row_v = src_v + uv_yi * src_stride_v; 552 553 // Allocate 2 rows of ARGB. 554 const int kRowSize = (dst_width * 4 + 15) & ~15; 555 align_buffer_64(row, kRowSize * 2); 556 557 // Allocate 1 row of ARGB for source conversion. 558 align_buffer_64(argb_row, src_width * 4); 559 560 uint8* rowptr = row; 561 int rowstride = kRowSize; 562 int lasty = yi; 563 564 // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. 565 ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); 566 if (src_height > 1) { 567 src_row_y += src_stride_y; 568 if (yi & 1) { 569 src_row_u += src_stride_u; 570 src_row_v += src_stride_v; 571 } 572 } 573 ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx); 574 if (src_height > 2) { 575 src_row_y += src_stride_y; 576 if (!(yi & 1)) { 577 src_row_u += src_stride_u; 578 src_row_v += src_stride_v; 579 } 580 } 581 582 for (j = 0; j < dst_height; ++j) { 583 yi = y >> 16; 584 if (yi != lasty) { 585 if (y > max_y) { 586 y = max_y; 587 yi = y >> 16; 588 uv_yi = yi >> kYShift; 589 src_row_y = src_y + yi * src_stride_y; 590 src_row_u = src_u + uv_yi * src_stride_u; 591 src_row_v = src_v + uv_yi * src_stride_v; 592 } 593 if (yi != lasty) { 594 // TODO(fbarchard): Convert the clipped region of row. 595 I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width); 596 ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx); 597 rowptr += rowstride; 598 rowstride = -rowstride; 599 lasty = yi; 600 src_row_y += src_stride_y; 601 if (yi & 1) { 602 src_row_u += src_stride_u; 603 src_row_v += src_stride_v; 604 } 605 } 606 } 607 if (filtering == kFilterLinear) { 608 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); 609 } else { 610 int yf = (y >> 8) & 255; 611 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); 612 } 613 dst_argb += dst_stride_argb; 614 y += dy; 615 } 616 free_aligned_buffer_64(row); 617 free_aligned_buffer_64(row_argb); 618 } 619 #endif 620 621 // Scale ARGB to/from any dimensions, without interpolation. 622 // Fixed point math is used for performance: The upper 16 bits 623 // of x and dx is the integer part of the source position and 624 // the lower 16 bits are the fixed decimal part. 625 626 static void ScaleARGBSimple(int src_width, int src_height, 627 int dst_width, int dst_height, 628 int src_stride, int dst_stride, 629 const uint8* src_argb, uint8* dst_argb, 630 int x, int dx, int y, int dy) { 631 int j; 632 void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb, 633 int dst_width, int x, int dx) = 634 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; 635 #if defined(HAS_SCALEARGBCOLS_SSE2) 636 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { 637 ScaleARGBCols = ScaleARGBCols_SSE2; 638 } 639 #endif 640 if (src_width * 2 == dst_width && x < 0x8000) { 641 ScaleARGBCols = ScaleARGBColsUp2_C; 642 #if defined(HAS_SCALEARGBCOLSUP2_SSE2) 643 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && 644 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && 645 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 646 ScaleARGBCols = ScaleARGBColsUp2_SSE2; 647 } 648 #endif 649 } 650 651 for (j = 0; j < dst_height; ++j) { 652 ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, 653 dst_width, x, dx); 654 dst_argb += dst_stride; 655 y += dy; 656 } 657 } 658 659 // ScaleARGB a ARGB. 660 // This function in turn calls a scaling function 661 // suitable for handling the desired resolutions. 662 static void ScaleARGB(const uint8* src, int src_stride, 663 int src_width, int src_height, 664 uint8* dst, int dst_stride, 665 int dst_width, int dst_height, 666 int clip_x, int clip_y, int clip_width, int clip_height, 667 enum FilterMode filtering) { 668 // Initial source x/y coordinate and step values as 16.16 fixed point. 669 int x = 0; 670 int y = 0; 671 int dx = 0; 672 int dy = 0; 673 // ARGB does not support box filter yet, but allow the user to pass it. 674 // Simplify filtering when possible. 675 filtering = ScaleFilterReduce(src_width, src_height, 676 dst_width, dst_height, 677 filtering); 678 679 // Negative src_height means invert the image. 680 if (src_height < 0) { 681 src_height = -src_height; 682 src = src + (src_height - 1) * src_stride; 683 src_stride = -src_stride; 684 } 685 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, 686 &x, &y, &dx, &dy); 687 src_width = Abs(src_width); 688 if (clip_x) { 689 int64 clipf = (int64)(clip_x) * dx; 690 x += (clipf & 0xffff); 691 src += (clipf >> 16) * 4; 692 dst += clip_x * 4; 693 } 694 if (clip_y) { 695 int64 clipf = (int64)(clip_y) * dy; 696 y += (clipf & 0xffff); 697 src += (clipf >> 16) * src_stride; 698 dst += clip_y * dst_stride; 699 } 700 701 // Special case for integer step values. 702 if (((dx | dy) & 0xffff) == 0) { 703 if (!dx || !dy) { // 1 pixel wide and/or tall. 704 filtering = kFilterNone; 705 } else { 706 // Optimized even scale down. ie 2, 4, 6, 8, 10x. 707 if (!(dx & 0x10000) && !(dy & 0x10000)) { 708 if (dx == 0x20000) { 709 // Optimized 1/2 downsample. 710 ScaleARGBDown2(src_width, src_height, 711 clip_width, clip_height, 712 src_stride, dst_stride, src, dst, 713 x, dx, y, dy, filtering); 714 return; 715 } 716 if (dx == 0x40000 && filtering == kFilterBox) { 717 // Optimized 1/4 box downsample. 718 ScaleARGBDown4Box(src_width, src_height, 719 clip_width, clip_height, 720 src_stride, dst_stride, src, dst, 721 x, dx, y, dy); 722 return; 723 } 724 ScaleARGBDownEven(src_width, src_height, 725 clip_width, clip_height, 726 src_stride, dst_stride, src, dst, 727 x, dx, y, dy, filtering); 728 return; 729 } 730 // Optimized odd scale down. ie 3, 5, 7, 9x. 731 if ((dx & 0x10000) && (dy & 0x10000)) { 732 filtering = kFilterNone; 733 if (dx == 0x10000 && dy == 0x10000) { 734 // Straight copy. 735 ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride, 736 dst, dst_stride, clip_width, clip_height); 737 return; 738 } 739 } 740 } 741 } 742 if (dx == 0x10000 && (x & 0xffff) == 0) { 743 // Arbitrary scale vertically, but unscaled vertically. 744 ScalePlaneVertical(src_height, 745 clip_width, clip_height, 746 src_stride, dst_stride, src, dst, 747 x, y, dy, 4, filtering); 748 return; 749 } 750 if (filtering && dy < 65536) { 751 ScaleARGBBilinearUp(src_width, src_height, 752 clip_width, clip_height, 753 src_stride, dst_stride, src, dst, 754 x, dx, y, dy, filtering); 755 return; 756 } 757 if (filtering) { 758 ScaleARGBBilinearDown(src_width, src_height, 759 clip_width, clip_height, 760 src_stride, dst_stride, src, dst, 761 x, dx, y, dy, filtering); 762 return; 763 } 764 ScaleARGBSimple(src_width, src_height, clip_width, clip_height, 765 src_stride, dst_stride, src, dst, 766 x, dx, y, dy); 767 } 768 769 LIBYUV_API 770 int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, 771 int src_width, int src_height, 772 uint8* dst_argb, int dst_stride_argb, 773 int dst_width, int dst_height, 774 int clip_x, int clip_y, int clip_width, int clip_height, 775 enum FilterMode filtering) { 776 if (!src_argb || src_width == 0 || src_height == 0 || 777 !dst_argb || dst_width <= 0 || dst_height <= 0 || 778 clip_x < 0 || clip_y < 0 || 779 (clip_x + clip_width) > dst_width || 780 (clip_y + clip_height) > dst_height) { 781 return -1; 782 } 783 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, 784 dst_argb, dst_stride_argb, dst_width, dst_height, 785 clip_x, clip_y, clip_width, clip_height, filtering); 786 return 0; 787 } 788 789 // Scale an ARGB image. 790 LIBYUV_API 791 int ARGBScale(const uint8* src_argb, int src_stride_argb, 792 int src_width, int src_height, 793 uint8* dst_argb, int dst_stride_argb, 794 int dst_width, int dst_height, 795 enum FilterMode filtering) { 796 if (!src_argb || src_width == 0 || src_height == 0 || 797 !dst_argb || dst_width <= 0 || dst_height <= 0) { 798 return -1; 799 } 800 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, 801 dst_argb, dst_stride_argb, dst_width, dst_height, 802 0, 0, dst_width, dst_height, filtering); 803 return 0; 804 } 805 806 #ifdef __cplusplus 807 } // extern "C" 808 } // namespace libyuv 809 #endif 810