1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/scale.h" 12 13 #include <assert.h> 14 #include <string.h> 15 16 #include "libyuv/cpu_id.h" 17 #include "libyuv/planar_functions.h" // For CopyPlane 18 #include "libyuv/row.h" 19 #include "libyuv/scale_row.h" 20 21 #ifdef __cplusplus 22 namespace libyuv { 23 extern "C" { 24 #endif 25 26 // Remove this macro if OVERREAD is safe. 27 #define AVOID_OVERREAD 1 28 29 static __inline int Abs(int v) { 30 return v >= 0 ? v : -v; 31 } 32 33 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) 34 35 // Scale plane, 1/2 36 // This is an optimized version for scaling down a plane to 1/2 of 37 // its original size. 38 39 static void ScalePlaneDown2(int src_width, int src_height, 40 int dst_width, int dst_height, 41 int src_stride, int dst_stride, 42 const uint8* src_ptr, uint8* dst_ptr, 43 enum FilterMode filtering) { 44 int y; 45 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, 46 uint8* dst_ptr, int dst_width) = 47 filtering == kFilterNone ? ScaleRowDown2_C : 48 (filtering == kFilterLinear ? ScaleRowDown2Linear_C : 49 ScaleRowDown2Box_C); 50 int row_stride = src_stride << 1; 51 if (!filtering) { 52 src_ptr += src_stride; // Point to odd rows. 53 src_stride = 0; 54 } 55 56 #if defined(HAS_SCALEROWDOWN2_NEON) 57 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { 58 ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON; 59 } 60 #elif defined(HAS_SCALEROWDOWN2_SSE2) 61 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { 62 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 : 63 (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 : 64 ScaleRowDown2Box_Unaligned_SSE2); 65 if (IS_ALIGNED(src_ptr, 16) && 66 IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) && 67 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 68 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 : 69 (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 : 70 ScaleRowDown2Box_SSE2); 71 } 72 } 73 #elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2) 74 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && 75 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && 76 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 77 ScaleRowDown2 = filtering ? 78 ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2; 79 } 80 #endif 81 82 if (filtering == kFilterLinear) { 83 src_stride = 0; 84 } 85 // TODO(fbarchard): Loop through source height to allow odd height. 86 for (y = 0; y < dst_height; ++y) { 87 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); 88 src_ptr += row_stride; 89 dst_ptr += dst_stride; 90 } 91 } 92 93 static void ScalePlaneDown2_16(int src_width, int src_height, 94 int dst_width, int dst_height, 95 int src_stride, int dst_stride, 96 const uint16* src_ptr, uint16* dst_ptr, 97 enum FilterMode filtering) { 98 int y; 99 void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride, 100 uint16* dst_ptr, int dst_width) = 101 filtering == kFilterNone ? ScaleRowDown2_16_C : 102 (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C : 103 ScaleRowDown2Box_16_C); 104 int row_stride = src_stride << 1; 105 if (!filtering) { 106 src_ptr += src_stride; // Point to odd rows. 107 src_stride = 0; 108 } 109 110 #if defined(HAS_SCALEROWDOWN2_16_NEON) 111 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { 112 ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON : 113 ScaleRowDown2_16_NEON; 114 } 115 #elif defined(HAS_SCALEROWDOWN2_16_SSE2) 116 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { 117 ScaleRowDown2 = filtering == kFilterNone ? 118 ScaleRowDown2_Unaligned_16_SSE2 : 119 (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 : 120 ScaleRowDown2Box_Unaligned_16_SSE2); 121 if (IS_ALIGNED(src_ptr, 16) && 122 IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) && 123 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 124 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 : 125 (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 : 126 ScaleRowDown2Box_16_SSE2); 127 } 128 } 129 #elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2) 130 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && 131 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && 132 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 133 ScaleRowDown2 = filtering ? 134 ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2; 135 } 136 #endif 137 138 if (filtering == kFilterLinear) { 139 src_stride = 0; 140 } 141 // TODO(fbarchard): Loop through source height to allow odd height. 142 for (y = 0; y < dst_height; ++y) { 143 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); 144 src_ptr += row_stride; 145 dst_ptr += dst_stride; 146 } 147 } 148 149 // Scale plane, 1/4 150 // This is an optimized version for scaling down a plane to 1/4 of 151 // its original size. 152 153 static void ScalePlaneDown4(int src_width, int src_height, 154 int dst_width, int dst_height, 155 int src_stride, int dst_stride, 156 const uint8* src_ptr, uint8* dst_ptr, 157 enum FilterMode filtering) { 158 int y; 159 void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride, 160 uint8* dst_ptr, int dst_width) = 161 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; 162 int row_stride = src_stride << 2; 163 if (!filtering) { 164 src_ptr += src_stride * 2; // Point to row 2. 165 src_stride = 0; 166 } 167 #if defined(HAS_SCALEROWDOWN4_NEON) 168 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { 169 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; 170 } 171 #elif defined(HAS_SCALEROWDOWN4_SSE2) 172 if (TestCpuFlag(kCpuHasSSE2) && 173 IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) && 174 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 175 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; 176 } 177 #elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2) 178 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && 179 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 180 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 181 ScaleRowDown4 = filtering ? 182 ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2; 183 } 184 #endif 185 186 if (filtering == kFilterLinear) { 187 src_stride = 0; 188 } 189 for (y = 0; y < dst_height; ++y) { 190 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); 191 src_ptr += row_stride; 192 dst_ptr += dst_stride; 193 } 194 } 195 196 static void ScalePlaneDown4_16(int src_width, int src_height, 197 int dst_width, int dst_height, 198 int src_stride, int dst_stride, 199 const uint16* src_ptr, uint16* dst_ptr, 200 enum FilterMode filtering) { 201 int y; 202 void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride, 203 uint16* dst_ptr, int dst_width) = 204 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C; 205 int row_stride = src_stride << 2; 206 if (!filtering) { 207 src_ptr += src_stride * 2; // Point to row 2. 208 src_stride = 0; 209 } 210 #if defined(HAS_SCALEROWDOWN4_16_NEON) 211 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { 212 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON : 213 ScaleRowDown4_16_NEON; 214 } 215 #elif defined(HAS_SCALEROWDOWN4_16_SSE2) 216 if (TestCpuFlag(kCpuHasSSE2) && 217 IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) && 218 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 219 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 : 220 ScaleRowDown4_16_SSE2; 221 } 222 #elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2) 223 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && 224 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 225 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 226 ScaleRowDown4 = filtering ? 227 ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2; 228 } 229 #endif 230 231 if (filtering == kFilterLinear) { 232 src_stride = 0; 233 } 234 for (y = 0; y < dst_height; ++y) { 235 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); 236 src_ptr += row_stride; 237 dst_ptr += dst_stride; 238 } 239 } 240 241 // Scale plane down, 3/4 242 243 static void ScalePlaneDown34(int src_width, int src_height, 244 int dst_width, int dst_height, 245 int src_stride, int dst_stride, 246 const uint8* src_ptr, uint8* dst_ptr, 247 enum FilterMode filtering) { 248 int y; 249 void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride, 250 uint8* dst_ptr, int dst_width); 251 void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride, 252 uint8* dst_ptr, int dst_width); 253 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; 254 assert(dst_width % 3 == 0); 255 if (!filtering) { 256 ScaleRowDown34_0 = ScaleRowDown34_C; 257 ScaleRowDown34_1 = ScaleRowDown34_C; 258 } else { 259 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C; 260 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C; 261 } 262 #if defined(HAS_SCALEROWDOWN34_NEON) 263 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { 264 if (!filtering) { 265 ScaleRowDown34_0 = ScaleRowDown34_NEON; 266 ScaleRowDown34_1 = ScaleRowDown34_NEON; 267 } else { 268 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON; 269 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; 270 } 271 } 272 #endif 273 #if defined(HAS_SCALEROWDOWN34_SSSE3) 274 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && 275 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 276 if (!filtering) { 277 ScaleRowDown34_0 = ScaleRowDown34_SSSE3; 278 ScaleRowDown34_1 = ScaleRowDown34_SSSE3; 279 } else { 280 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3; 281 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; 282 } 283 } 284 #endif 285 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2) 286 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && 287 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 288 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 289 if (!filtering) { 290 ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2; 291 ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2; 292 } else { 293 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2; 294 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2; 295 } 296 } 297 #endif 298 299 for (y = 0; y < dst_height - 2; y += 3) { 300 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); 301 src_ptr += src_stride; 302 dst_ptr += dst_stride; 303 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); 304 src_ptr += src_stride; 305 dst_ptr += dst_stride; 306 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, 307 dst_ptr, dst_width); 308 src_ptr += src_stride * 2; 309 dst_ptr += dst_stride; 310 } 311 312 // Remainder 1 or 2 rows with last row vertically unfiltered 313 if ((dst_height % 3) == 2) { 314 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); 315 src_ptr += src_stride; 316 dst_ptr += dst_stride; 317 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); 318 } else if ((dst_height % 3) == 1) { 319 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); 320 } 321 } 322 323 static void ScalePlaneDown34_16(int src_width, int src_height, 324 int dst_width, int dst_height, 325 int src_stride, int dst_stride, 326 const uint16* src_ptr, uint16* dst_ptr, 327 enum FilterMode filtering) { 328 int y; 329 void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride, 330 uint16* dst_ptr, int dst_width); 331 void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride, 332 uint16* dst_ptr, int dst_width); 333 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; 334 assert(dst_width % 3 == 0); 335 if (!filtering) { 336 ScaleRowDown34_0 = ScaleRowDown34_16_C; 337 ScaleRowDown34_1 = ScaleRowDown34_16_C; 338 } else { 339 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C; 340 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C; 341 } 342 #if defined(HAS_SCALEROWDOWN34_16_NEON) 343 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { 344 if (!filtering) { 345 ScaleRowDown34_0 = ScaleRowDown34_16_NEON; 346 ScaleRowDown34_1 = ScaleRowDown34_16_NEON; 347 } else { 348 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON; 349 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON; 350 } 351 } 352 #endif 353 #if defined(HAS_SCALEROWDOWN34_16_SSSE3) 354 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && 355 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 356 if (!filtering) { 357 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3; 358 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3; 359 } else { 360 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3; 361 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3; 362 } 363 } 364 #endif 365 #if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2) 366 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && 367 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 368 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 369 if (!filtering) { 370 ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2; 371 ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2; 372 } else { 373 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2; 374 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2; 375 } 376 } 377 #endif 378 379 for (y = 0; y < dst_height - 2; y += 3) { 380 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); 381 src_ptr += src_stride; 382 dst_ptr += dst_stride; 383 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); 384 src_ptr += src_stride; 385 dst_ptr += dst_stride; 386 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, 387 dst_ptr, dst_width); 388 src_ptr += src_stride * 2; 389 dst_ptr += dst_stride; 390 } 391 392 // Remainder 1 or 2 rows with last row vertically unfiltered 393 if ((dst_height % 3) == 2) { 394 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); 395 src_ptr += src_stride; 396 dst_ptr += dst_stride; 397 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); 398 } else if ((dst_height % 3) == 1) { 399 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); 400 } 401 } 402 403 404 // Scale plane, 3/8 405 // This is an optimized version for scaling down a plane to 3/8 406 // of its original size. 407 // 408 // Uses box filter arranges like this 409 // aaabbbcc -> abc 410 // aaabbbcc def 411 // aaabbbcc ghi 412 // dddeeeff 413 // dddeeeff 414 // dddeeeff 415 // ggghhhii 416 // ggghhhii 417 // Boxes are 3x3, 2x3, 3x2 and 2x2 418 419 static void ScalePlaneDown38(int src_width, int src_height, 420 int dst_width, int dst_height, 421 int src_stride, int dst_stride, 422 const uint8* src_ptr, uint8* dst_ptr, 423 enum FilterMode filtering) { 424 int y; 425 void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride, 426 uint8* dst_ptr, int dst_width); 427 void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride, 428 uint8* dst_ptr, int dst_width); 429 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; 430 assert(dst_width % 3 == 0); 431 if (!filtering) { 432 ScaleRowDown38_3 = ScaleRowDown38_C; 433 ScaleRowDown38_2 = ScaleRowDown38_C; 434 } else { 435 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C; 436 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C; 437 } 438 #if defined(HAS_SCALEROWDOWN38_NEON) 439 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { 440 if (!filtering) { 441 ScaleRowDown38_3 = ScaleRowDown38_NEON; 442 ScaleRowDown38_2 = ScaleRowDown38_NEON; 443 } else { 444 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON; 445 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; 446 } 447 } 448 #elif defined(HAS_SCALEROWDOWN38_SSSE3) 449 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && 450 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 451 if (!filtering) { 452 ScaleRowDown38_3 = ScaleRowDown38_SSSE3; 453 ScaleRowDown38_2 = ScaleRowDown38_SSSE3; 454 } else { 455 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3; 456 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3; 457 } 458 } 459 #elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2) 460 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && 461 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 462 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 463 if (!filtering) { 464 ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2; 465 ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2; 466 } else { 467 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2; 468 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2; 469 } 470 } 471 #endif 472 473 for (y = 0; y < dst_height - 2; y += 3) { 474 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 475 src_ptr += src_stride * 3; 476 dst_ptr += dst_stride; 477 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 478 src_ptr += src_stride * 3; 479 dst_ptr += dst_stride; 480 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); 481 src_ptr += src_stride * 2; 482 dst_ptr += dst_stride; 483 } 484 485 // Remainder 1 or 2 rows with last row vertically unfiltered 486 if ((dst_height % 3) == 2) { 487 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 488 src_ptr += src_stride * 3; 489 dst_ptr += dst_stride; 490 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); 491 } else if ((dst_height % 3) == 1) { 492 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); 493 } 494 } 495 496 static void ScalePlaneDown38_16(int src_width, int src_height, 497 int dst_width, int dst_height, 498 int src_stride, int dst_stride, 499 const uint16* src_ptr, uint16* dst_ptr, 500 enum FilterMode filtering) { 501 int y; 502 void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride, 503 uint16* dst_ptr, int dst_width); 504 void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride, 505 uint16* dst_ptr, int dst_width); 506 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; 507 assert(dst_width % 3 == 0); 508 if (!filtering) { 509 ScaleRowDown38_3 = ScaleRowDown38_16_C; 510 ScaleRowDown38_2 = ScaleRowDown38_16_C; 511 } else { 512 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C; 513 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C; 514 } 515 #if defined(HAS_SCALEROWDOWN38_16_NEON) 516 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { 517 if (!filtering) { 518 ScaleRowDown38_3 = ScaleRowDown38_16_NEON; 519 ScaleRowDown38_2 = ScaleRowDown38_16_NEON; 520 } else { 521 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON; 522 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON; 523 } 524 } 525 #elif defined(HAS_SCALEROWDOWN38_16_SSSE3) 526 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && 527 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 528 if (!filtering) { 529 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3; 530 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3; 531 } else { 532 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3; 533 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3; 534 } 535 } 536 #elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2) 537 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && 538 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 539 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 540 if (!filtering) { 541 ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2; 542 ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2; 543 } else { 544 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2; 545 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2; 546 } 547 } 548 #endif 549 550 for (y = 0; y < dst_height - 2; y += 3) { 551 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 552 src_ptr += src_stride * 3; 553 dst_ptr += dst_stride; 554 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 555 src_ptr += src_stride * 3; 556 dst_ptr += dst_stride; 557 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); 558 src_ptr += src_stride * 2; 559 dst_ptr += dst_stride; 560 } 561 562 // Remainder 1 or 2 rows with last row vertically unfiltered 563 if ((dst_height % 3) == 2) { 564 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 565 src_ptr += src_stride * 3; 566 dst_ptr += dst_stride; 567 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); 568 } else if ((dst_height % 3) == 1) { 569 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); 570 } 571 } 572 573 static __inline uint32 SumBox(int iboxwidth, int iboxheight, 574 ptrdiff_t src_stride, const uint8* src_ptr) { 575 uint32 sum = 0u; 576 int y; 577 assert(iboxwidth > 0); 578 assert(iboxheight > 0); 579 for (y = 0; y < iboxheight; ++y) { 580 int x; 581 for (x = 0; x < iboxwidth; ++x) { 582 sum += src_ptr[x]; 583 } 584 src_ptr += src_stride; 585 } 586 return sum; 587 } 588 589 static __inline uint32 SumBox_16(int iboxwidth, int iboxheight, 590 ptrdiff_t src_stride, const uint16* src_ptr) { 591 uint32 sum = 0u; 592 int y; 593 assert(iboxwidth > 0); 594 assert(iboxheight > 0); 595 for (y = 0; y < iboxheight; ++y) { 596 int x; 597 for (x = 0; x < iboxwidth; ++x) { 598 sum += src_ptr[x]; 599 } 600 src_ptr += src_stride; 601 } 602 return sum; 603 } 604 605 static void ScalePlaneBoxRow_C(int dst_width, int boxheight, 606 int x, int dx, ptrdiff_t src_stride, 607 const uint8* src_ptr, uint8* dst_ptr) { 608 int i; 609 int boxwidth; 610 for (i = 0; i < dst_width; ++i) { 611 int ix = x >> 16; 612 x += dx; 613 boxwidth = (x >> 16) - ix; 614 *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) / 615 (boxwidth * boxheight); 616 } 617 } 618 619 static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight, 620 int x, int dx, ptrdiff_t src_stride, 621 const uint16* src_ptr, uint16* dst_ptr) { 622 int i; 623 int boxwidth; 624 for (i = 0; i < dst_width; ++i) { 625 int ix = x >> 16; 626 x += dx; 627 boxwidth = (x >> 16) - ix; 628 *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) / 629 (boxwidth * boxheight); 630 } 631 } 632 633 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { 634 uint32 sum = 0u; 635 int x; 636 assert(iboxwidth > 0); 637 for (x = 0; x < iboxwidth; ++x) { 638 sum += src_ptr[x]; 639 } 640 return sum; 641 } 642 643 static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) { 644 uint32 sum = 0u; 645 int x; 646 assert(iboxwidth > 0); 647 for (x = 0; x < iboxwidth; ++x) { 648 sum += src_ptr[x]; 649 } 650 return sum; 651 } 652 653 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, 654 const uint16* src_ptr, uint8* dst_ptr) { 655 int i; 656 int scaletbl[2]; 657 int minboxwidth = (dx >> 16); 658 int* scaleptr = scaletbl - minboxwidth; 659 int boxwidth; 660 scaletbl[0] = 65536 / (minboxwidth * boxheight); 661 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); 662 for (i = 0; i < dst_width; ++i) { 663 int ix = x >> 16; 664 x += dx; 665 boxwidth = (x >> 16) - ix; 666 *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16; 667 } 668 } 669 670 static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, 671 const uint32* src_ptr, uint16* dst_ptr) { 672 int i; 673 int scaletbl[2]; 674 int minboxwidth = (dx >> 16); 675 int* scaleptr = scaletbl - minboxwidth; 676 int boxwidth; 677 scaletbl[0] = 65536 / (minboxwidth * boxheight); 678 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); 679 for (i = 0; i < dst_width; ++i) { 680 int ix = x >> 16; 681 x += dx; 682 boxwidth = (x >> 16) - ix; 683 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * 684 scaleptr[boxwidth] >> 16; 685 } 686 } 687 688 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, 689 const uint16* src_ptr, uint8* dst_ptr) { 690 int boxwidth = (dx >> 16); 691 int scaleval = 65536 / (boxwidth * boxheight); 692 int i; 693 for (i = 0; i < dst_width; ++i) { 694 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; 695 x += boxwidth; 696 } 697 } 698 699 static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx, 700 const uint32* src_ptr, uint16* dst_ptr) { 701 int boxwidth = (dx >> 16); 702 int scaleval = 65536 / (boxwidth * boxheight); 703 int i; 704 for (i = 0; i < dst_width; ++i) { 705 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16; 706 x += boxwidth; 707 } 708 } 709 710 // Scale plane down to any dimensions, with interpolation. 711 // (boxfilter). 712 // 713 // Same method as SimpleScale, which is fixed point, outputting 714 // one pixel of destination using fixed point (16.16) to step 715 // through source, sampling a box of pixel with simple 716 // averaging. 717 static void ScalePlaneBox(int src_width, int src_height, 718 int dst_width, int dst_height, 719 int src_stride, int dst_stride, 720 const uint8* src_ptr, uint8* dst_ptr) { 721 int j; 722 // Initial source x/y coordinate and step values as 16.16 fixed point. 723 int x = 0; 724 int y = 0; 725 int dx = 0; 726 int dy = 0; 727 const int max_y = (src_height << 16); 728 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, 729 &x, &y, &dx, &dy); 730 src_width = Abs(src_width); 731 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1. 732 if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { 733 uint8* dst = dst_ptr; 734 int j; 735 for (j = 0; j < dst_height; ++j) { 736 int boxheight; 737 int iy = y >> 16; 738 const uint8* src = src_ptr + iy * src_stride; 739 y += dy; 740 if (y > max_y) { 741 y = max_y; 742 } 743 boxheight = (y >> 16) - iy; 744 ScalePlaneBoxRow_C(dst_width, boxheight, 745 x, dx, src_stride, 746 src, dst); 747 dst += dst_stride; 748 } 749 return; 750 } 751 { 752 // Allocate a row buffer of uint16. 753 align_buffer_64(row16, src_width * 2); 754 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, 755 const uint16* src_ptr, uint8* dst_ptr) = 756 (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C; 757 void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride, 758 uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C; 759 760 #if defined(HAS_SCALEADDROWS_SSE2) 761 if (TestCpuFlag(kCpuHasSSE2) && 762 #ifdef AVOID_OVERREAD 763 IS_ALIGNED(src_width, 16) && 764 #endif 765 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 766 ScaleAddRows = ScaleAddRows_SSE2; 767 } 768 #endif 769 770 for (j = 0; j < dst_height; ++j) { 771 int boxheight; 772 int iy = y >> 16; 773 const uint8* src = src_ptr + iy * src_stride; 774 y += dy; 775 if (y > (src_height << 16)) { 776 y = (src_height << 16); 777 } 778 boxheight = (y >> 16) - iy; 779 ScaleAddRows(src, src_stride, (uint16*)(row16), 780 src_width, boxheight); 781 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), 782 dst_ptr); 783 dst_ptr += dst_stride; 784 } 785 free_aligned_buffer_64(row16); 786 } 787 } 788 789 static void ScalePlaneBox_16(int src_width, int src_height, 790 int dst_width, int dst_height, 791 int src_stride, int dst_stride, 792 const uint16* src_ptr, uint16* dst_ptr) { 793 int j; 794 // Initial source x/y coordinate and step values as 16.16 fixed point. 795 int x = 0; 796 int y = 0; 797 int dx = 0; 798 int dy = 0; 799 const int max_y = (src_height << 16); 800 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, 801 &x, &y, &dx, &dy); 802 src_width = Abs(src_width); 803 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1. 804 if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { 805 uint16* dst = dst_ptr; 806 int j; 807 for (j = 0; j < dst_height; ++j) { 808 int boxheight; 809 int iy = y >> 16; 810 const uint16* src = src_ptr + iy * src_stride; 811 y += dy; 812 if (y > max_y) { 813 y = max_y; 814 } 815 boxheight = (y >> 16) - iy; 816 ScalePlaneBoxRow_16_C(dst_width, boxheight, 817 x, dx, src_stride, 818 src, dst); 819 dst += dst_stride; 820 } 821 return; 822 } 823 { 824 // Allocate a row buffer of uint32. 825 align_buffer_64(row32, src_width * 4); 826 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, 827 const uint32* src_ptr, uint16* dst_ptr) = 828 (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C; 829 void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride, 830 uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C; 831 832 #if defined(HAS_SCALEADDROWS_16_SSE2) 833 if (TestCpuFlag(kCpuHasSSE2) && 834 #ifdef AVOID_OVERREAD 835 IS_ALIGNED(src_width, 16) && 836 #endif 837 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 838 ScaleAddRows = ScaleAddRows_16_SSE2; 839 } 840 #endif 841 842 for (j = 0; j < dst_height; ++j) { 843 int boxheight; 844 int iy = y >> 16; 845 const uint16* src = src_ptr + iy * src_stride; 846 y += dy; 847 if (y > (src_height << 16)) { 848 y = (src_height << 16); 849 } 850 boxheight = (y >> 16) - iy; 851 ScaleAddRows(src, src_stride, (uint32*)(row32), 852 src_width, boxheight); 853 ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), 854 dst_ptr); 855 dst_ptr += dst_stride; 856 } 857 free_aligned_buffer_64(row32); 858 } 859 } 860 861 // Scale plane down with bilinear interpolation. 862 void ScalePlaneBilinearDown(int src_width, int src_height, 863 int dst_width, int dst_height, 864 int src_stride, int dst_stride, 865 const uint8* src_ptr, uint8* dst_ptr, 866 enum FilterMode filtering) { 867 // Initial source x/y coordinate and step values as 16.16 fixed point. 868 int x = 0; 869 int y = 0; 870 int dx = 0; 871 int dy = 0; 872 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. 873 // Allocate a row buffer. 874 align_buffer_64(row, src_width); 875 876 const int max_y = (src_height - 1) << 16; 877 int j; 878 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, 879 int dst_width, int x, int dx) = 880 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; 881 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, 882 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 883 InterpolateRow_C; 884 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, 885 &x, &y, &dx, &dy); 886 src_width = Abs(src_width); 887 888 #if defined(HAS_INTERPOLATEROW_SSE2) 889 if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) { 890 InterpolateRow = InterpolateRow_Any_SSE2; 891 if (IS_ALIGNED(src_width, 16)) { 892 InterpolateRow = InterpolateRow_Unaligned_SSE2; 893 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 894 InterpolateRow = InterpolateRow_SSE2; 895 } 896 } 897 } 898 #endif 899 #if defined(HAS_INTERPOLATEROW_SSSE3) 900 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) { 901 InterpolateRow = InterpolateRow_Any_SSSE3; 902 if (IS_ALIGNED(src_width, 16)) { 903 InterpolateRow = InterpolateRow_Unaligned_SSSE3; 904 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 905 InterpolateRow = InterpolateRow_SSSE3; 906 } 907 } 908 } 909 #endif 910 #if defined(HAS_INTERPOLATEROW_AVX2) 911 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) { 912 InterpolateRow = InterpolateRow_Any_AVX2; 913 if (IS_ALIGNED(src_width, 32)) { 914 InterpolateRow = InterpolateRow_AVX2; 915 } 916 } 917 #endif 918 #if defined(HAS_INTERPOLATEROW_NEON) 919 if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) { 920 InterpolateRow = InterpolateRow_Any_NEON; 921 if (IS_ALIGNED(src_width, 16)) { 922 InterpolateRow = InterpolateRow_NEON; 923 } 924 } 925 #endif 926 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) 927 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) { 928 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; 929 if (IS_ALIGNED(src_width, 4)) { 930 InterpolateRow = InterpolateRow_MIPS_DSPR2; 931 } 932 } 933 #endif 934 935 936 #if defined(HAS_SCALEFILTERCOLS_SSSE3) 937 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 938 ScaleFilterCols = ScaleFilterCols_SSSE3; 939 } 940 #endif 941 if (y > max_y) { 942 y = max_y; 943 } 944 945 for (j = 0; j < dst_height; ++j) { 946 int yi = y >> 16; 947 const uint8* src = src_ptr + yi * src_stride; 948 if (filtering == kFilterLinear) { 949 ScaleFilterCols(dst_ptr, src, dst_width, x, dx); 950 } else { 951 int yf = (y >> 8) & 255; 952 InterpolateRow(row, src, src_stride, src_width, yf); 953 ScaleFilterCols(dst_ptr, row, dst_width, x, dx); 954 } 955 dst_ptr += dst_stride; 956 y += dy; 957 if (y > max_y) { 958 y = max_y; 959 } 960 } 961 free_aligned_buffer_64(row); 962 } 963 964 void ScalePlaneBilinearDown_16(int src_width, int src_height, 965 int dst_width, int dst_height, 966 int src_stride, int dst_stride, 967 const uint16* src_ptr, uint16* dst_ptr, 968 enum FilterMode filtering) { 969 // Initial source x/y coordinate and step values as 16.16 fixed point. 970 int x = 0; 971 int y = 0; 972 int dx = 0; 973 int dy = 0; 974 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. 975 // Allocate a row buffer. 976 align_buffer_64(row, src_width * 2); 977 978 const int max_y = (src_height - 1) << 16; 979 int j; 980 void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, 981 int dst_width, int x, int dx) = 982 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C; 983 void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, 984 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 985 InterpolateRow_16_C; 986 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, 987 &x, &y, &dx, &dy); 988 src_width = Abs(src_width); 989 990 #if defined(HAS_INTERPOLATEROW_16_SSE2) 991 if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) { 992 InterpolateRow = InterpolateRow_Any_16_SSE2; 993 if (IS_ALIGNED(src_width, 16)) { 994 InterpolateRow = InterpolateRow_Unaligned_16_SSE2; 995 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 996 InterpolateRow = InterpolateRow_16_SSE2; 997 } 998 } 999 } 1000 #endif 1001 #if defined(HAS_INTERPOLATEROW_16_SSSE3) 1002 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) { 1003 InterpolateRow = InterpolateRow_Any_16_SSSE3; 1004 if (IS_ALIGNED(src_width, 16)) { 1005 InterpolateRow = InterpolateRow_Unaligned_16_SSSE3; 1006 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { 1007 InterpolateRow = InterpolateRow_16_SSSE3; 1008 } 1009 } 1010 } 1011 #endif 1012 #if defined(HAS_INTERPOLATEROW_16_AVX2) 1013 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) { 1014 InterpolateRow = InterpolateRow_Any_16_AVX2; 1015 if (IS_ALIGNED(src_width, 32)) { 1016 InterpolateRow = InterpolateRow_16_AVX2; 1017 } 1018 } 1019 #endif 1020 #if defined(HAS_INTERPOLATEROW_16_NEON) 1021 if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) { 1022 InterpolateRow = InterpolateRow_Any_16_NEON; 1023 if (IS_ALIGNED(src_width, 16)) { 1024 InterpolateRow = InterpolateRow_16_NEON; 1025 } 1026 } 1027 #endif 1028 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) 1029 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) { 1030 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; 1031 if (IS_ALIGNED(src_width, 4)) { 1032 InterpolateRow = InterpolateRow_16_MIPS_DSPR2; 1033 } 1034 } 1035 #endif 1036 1037 1038 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) 1039 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1040 ScaleFilterCols = ScaleFilterCols_16_SSSE3; 1041 } 1042 #endif 1043 if (y > max_y) { 1044 y = max_y; 1045 } 1046 1047 for (j = 0; j < dst_height; ++j) { 1048 int yi = y >> 16; 1049 const uint16* src = src_ptr + yi * src_stride; 1050 if (filtering == kFilterLinear) { 1051 ScaleFilterCols(dst_ptr, src, dst_width, x, dx); 1052 } else { 1053 int yf = (y >> 8) & 255; 1054 InterpolateRow((uint16*)row, src, src_stride, src_width, yf); 1055 ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx); 1056 } 1057 dst_ptr += dst_stride; 1058 y += dy; 1059 if (y > max_y) { 1060 y = max_y; 1061 } 1062 } 1063 free_aligned_buffer_64(row); 1064 } 1065 1066 // Scale up down with bilinear interpolation. 1067 void ScalePlaneBilinearUp(int src_width, int src_height, 1068 int dst_width, int dst_height, 1069 int src_stride, int dst_stride, 1070 const uint8* src_ptr, uint8* dst_ptr, 1071 enum FilterMode filtering) { 1072 int j; 1073 // Initial source x/y coordinate and step values as 16.16 fixed point. 1074 int x = 0; 1075 int y = 0; 1076 int dx = 0; 1077 int dy = 0; 1078 const int max_y = (src_height - 1) << 16; 1079 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, 1080 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 1081 InterpolateRow_C; 1082 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, 1083 int dst_width, int x, int dx) = 1084 filtering ? ScaleFilterCols_C : ScaleCols_C; 1085 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, 1086 &x, &y, &dx, &dy); 1087 src_width = Abs(src_width); 1088 1089 #if defined(HAS_INTERPOLATEROW_SSE2) 1090 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) { 1091 InterpolateRow = InterpolateRow_Any_SSE2; 1092 if (IS_ALIGNED(dst_width, 16)) { 1093 InterpolateRow = InterpolateRow_Unaligned_SSE2; 1094 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 1095 InterpolateRow = InterpolateRow_SSE2; 1096 } 1097 } 1098 } 1099 #endif 1100 #if defined(HAS_INTERPOLATEROW_SSSE3) 1101 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) { 1102 InterpolateRow = InterpolateRow_Any_SSSE3; 1103 if (IS_ALIGNED(dst_width, 16)) { 1104 InterpolateRow = InterpolateRow_Unaligned_SSSE3; 1105 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 1106 InterpolateRow = InterpolateRow_SSSE3; 1107 } 1108 } 1109 } 1110 #endif 1111 #if defined(HAS_INTERPOLATEROW_AVX2) 1112 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) { 1113 InterpolateRow = InterpolateRow_Any_AVX2; 1114 if (IS_ALIGNED(dst_width, 32)) { 1115 InterpolateRow = InterpolateRow_AVX2; 1116 } 1117 } 1118 #endif 1119 #if defined(HAS_INTERPOLATEROW_NEON) 1120 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) { 1121 InterpolateRow = InterpolateRow_Any_NEON; 1122 if (IS_ALIGNED(dst_width, 16)) { 1123 InterpolateRow = InterpolateRow_NEON; 1124 } 1125 } 1126 #endif 1127 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) 1128 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) { 1129 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; 1130 if (IS_ALIGNED(dst_width, 4)) { 1131 InterpolateRow = InterpolateRow_MIPS_DSPR2; 1132 } 1133 } 1134 #endif 1135 1136 if (filtering && src_width >= 32768) { 1137 ScaleFilterCols = ScaleFilterCols64_C; 1138 } 1139 #if defined(HAS_SCALEFILTERCOLS_SSSE3) 1140 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1141 ScaleFilterCols = ScaleFilterCols_SSSE3; 1142 } 1143 #endif 1144 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 1145 ScaleFilterCols = ScaleColsUp2_C; 1146 #if defined(HAS_SCALECOLS_SSE2) 1147 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && 1148 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && 1149 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 1150 ScaleFilterCols = ScaleColsUp2_SSE2; 1151 } 1152 #endif 1153 } 1154 1155 if (y > max_y) { 1156 y = max_y; 1157 } 1158 { 1159 int yi = y >> 16; 1160 const uint8* src = src_ptr + yi * src_stride; 1161 1162 // Allocate 2 row buffers. 1163 const int kRowSize = (dst_width + 15) & ~15; 1164 align_buffer_64(row, kRowSize * 2); 1165 1166 uint8* rowptr = row; 1167 int rowstride = kRowSize; 1168 int lasty = yi; 1169 1170 ScaleFilterCols(rowptr, src, dst_width, x, dx); 1171 if (src_height > 1) { 1172 src += src_stride; 1173 } 1174 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); 1175 src += src_stride; 1176 1177 for (j = 0; j < dst_height; ++j) { 1178 yi = y >> 16; 1179 if (yi != lasty) { 1180 if (y > max_y) { 1181 y = max_y; 1182 yi = y >> 16; 1183 src = src_ptr + yi * src_stride; 1184 } 1185 if (yi != lasty) { 1186 ScaleFilterCols(rowptr, src, dst_width, x, dx); 1187 rowptr += rowstride; 1188 rowstride = -rowstride; 1189 lasty = yi; 1190 src += src_stride; 1191 } 1192 } 1193 if (filtering == kFilterLinear) { 1194 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); 1195 } else { 1196 int yf = (y >> 8) & 255; 1197 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); 1198 } 1199 dst_ptr += dst_stride; 1200 y += dy; 1201 } 1202 free_aligned_buffer_64(row); 1203 } 1204 } 1205 1206 void ScalePlaneBilinearUp_16(int src_width, int src_height, 1207 int dst_width, int dst_height, 1208 int src_stride, int dst_stride, 1209 const uint16* src_ptr, uint16* dst_ptr, 1210 enum FilterMode filtering) { 1211 int j; 1212 // Initial source x/y coordinate and step values as 16.16 fixed point. 1213 int x = 0; 1214 int y = 0; 1215 int dx = 0; 1216 int dy = 0; 1217 const int max_y = (src_height - 1) << 16; 1218 void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, 1219 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 1220 InterpolateRow_16_C; 1221 void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, 1222 int dst_width, int x, int dx) = 1223 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; 1224 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, 1225 &x, &y, &dx, &dy); 1226 src_width = Abs(src_width); 1227 1228 #if defined(HAS_INTERPOLATEROW_16_SSE2) 1229 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) { 1230 InterpolateRow = InterpolateRow_Any_16_SSE2; 1231 if (IS_ALIGNED(dst_width, 16)) { 1232 InterpolateRow = InterpolateRow_Unaligned_16_SSE2; 1233 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 1234 InterpolateRow = InterpolateRow_16_SSE2; 1235 } 1236 } 1237 } 1238 #endif 1239 #if defined(HAS_INTERPOLATEROW_16_SSSE3) 1240 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) { 1241 InterpolateRow = InterpolateRow_Any_16_SSSE3; 1242 if (IS_ALIGNED(dst_width, 16)) { 1243 InterpolateRow = InterpolateRow_Unaligned_16_SSSE3; 1244 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 1245 InterpolateRow = InterpolateRow_16_SSSE3; 1246 } 1247 } 1248 } 1249 #endif 1250 #if defined(HAS_INTERPOLATEROW_16_AVX2) 1251 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) { 1252 InterpolateRow = InterpolateRow_Any_16_AVX2; 1253 if (IS_ALIGNED(dst_width, 32)) { 1254 InterpolateRow = InterpolateRow_16_AVX2; 1255 } 1256 } 1257 #endif 1258 #if defined(HAS_INTERPOLATEROW_16_NEON) 1259 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) { 1260 InterpolateRow = InterpolateRow_Any_16_NEON; 1261 if (IS_ALIGNED(dst_width, 16)) { 1262 InterpolateRow = InterpolateRow_16_NEON; 1263 } 1264 } 1265 #endif 1266 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) 1267 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) { 1268 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; 1269 if (IS_ALIGNED(dst_width, 4)) { 1270 InterpolateRow = InterpolateRow_16_MIPS_DSPR2; 1271 } 1272 } 1273 #endif 1274 1275 if (filtering && src_width >= 32768) { 1276 ScaleFilterCols = ScaleFilterCols64_16_C; 1277 } 1278 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) 1279 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1280 ScaleFilterCols = ScaleFilterCols_16_SSSE3; 1281 } 1282 #endif 1283 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 1284 ScaleFilterCols = ScaleColsUp2_16_C; 1285 #if defined(HAS_SCALECOLS_16_SSE2) 1286 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && 1287 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && 1288 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 1289 ScaleFilterCols = ScaleColsUp2_16_SSE2; 1290 } 1291 #endif 1292 } 1293 1294 if (y > max_y) { 1295 y = max_y; 1296 } 1297 { 1298 int yi = y >> 16; 1299 const uint16* src = src_ptr + yi * src_stride; 1300 1301 // Allocate 2 row buffers. 1302 const int kRowSize = (dst_width + 15) & ~15; 1303 align_buffer_64(row, kRowSize * 4); 1304 1305 uint16* rowptr = (uint16*)row; 1306 int rowstride = kRowSize; 1307 int lasty = yi; 1308 1309 ScaleFilterCols(rowptr, src, dst_width, x, dx); 1310 if (src_height > 1) { 1311 src += src_stride; 1312 } 1313 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); 1314 src += src_stride; 1315 1316 for (j = 0; j < dst_height; ++j) { 1317 yi = y >> 16; 1318 if (yi != lasty) { 1319 if (y > max_y) { 1320 y = max_y; 1321 yi = y >> 16; 1322 src = src_ptr + yi * src_stride; 1323 } 1324 if (yi != lasty) { 1325 ScaleFilterCols(rowptr, src, dst_width, x, dx); 1326 rowptr += rowstride; 1327 rowstride = -rowstride; 1328 lasty = yi; 1329 src += src_stride; 1330 } 1331 } 1332 if (filtering == kFilterLinear) { 1333 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); 1334 } else { 1335 int yf = (y >> 8) & 255; 1336 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); 1337 } 1338 dst_ptr += dst_stride; 1339 y += dy; 1340 } 1341 free_aligned_buffer_64(row); 1342 } 1343 } 1344 1345 // Scale Plane to/from any dimensions, without interpolation. 1346 // Fixed point math is used for performance: The upper 16 bits 1347 // of x and dx is the integer part of the source position and 1348 // the lower 16 bits are the fixed decimal part. 1349 1350 static void ScalePlaneSimple(int src_width, int src_height, 1351 int dst_width, int dst_height, 1352 int src_stride, int dst_stride, 1353 const uint8* src_ptr, uint8* dst_ptr) { 1354 int i; 1355 void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr, 1356 int dst_width, int x, int dx) = ScaleCols_C; 1357 // Initial source x/y coordinate and step values as 16.16 fixed point. 1358 int x = 0; 1359 int y = 0; 1360 int dx = 0; 1361 int dy = 0; 1362 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, 1363 &x, &y, &dx, &dy); 1364 src_width = Abs(src_width); 1365 1366 if (src_width * 2 == dst_width && x < 0x8000) { 1367 ScaleCols = ScaleColsUp2_C; 1368 #if defined(HAS_SCALECOLS_SSE2) 1369 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && 1370 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && 1371 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 1372 ScaleCols = ScaleColsUp2_SSE2; 1373 } 1374 #endif 1375 } 1376 1377 for (i = 0; i < dst_height; ++i) { 1378 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, 1379 dst_width, x, dx); 1380 dst_ptr += dst_stride; 1381 y += dy; 1382 } 1383 } 1384 1385 static void ScalePlaneSimple_16(int src_width, int src_height, 1386 int dst_width, int dst_height, 1387 int src_stride, int dst_stride, 1388 const uint16* src_ptr, uint16* dst_ptr) { 1389 int i; 1390 void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr, 1391 int dst_width, int x, int dx) = ScaleCols_16_C; 1392 // Initial source x/y coordinate and step values as 16.16 fixed point. 1393 int x = 0; 1394 int y = 0; 1395 int dx = 0; 1396 int dy = 0; 1397 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, 1398 &x, &y, &dx, &dy); 1399 src_width = Abs(src_width); 1400 1401 if (src_width * 2 == dst_width && x < 0x8000) { 1402 ScaleCols = ScaleColsUp2_16_C; 1403 #if defined(HAS_SCALECOLS_16_SSE2) 1404 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && 1405 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && 1406 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 1407 ScaleCols = ScaleColsUp2_16_SSE2; 1408 } 1409 #endif 1410 } 1411 1412 for (i = 0; i < dst_height; ++i) { 1413 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, 1414 dst_width, x, dx); 1415 dst_ptr += dst_stride; 1416 y += dy; 1417 } 1418 } 1419 1420 // Scale a plane. 1421 // This function dispatches to a specialized scaler based on scale factor. 1422 1423 LIBYUV_API 1424 void ScalePlane(const uint8* src, int src_stride, 1425 int src_width, int src_height, 1426 uint8* dst, int dst_stride, 1427 int dst_width, int dst_height, 1428 enum FilterMode filtering) { 1429 // Simplify filtering when possible. 1430 filtering = ScaleFilterReduce(src_width, src_height, 1431 dst_width, dst_height, 1432 filtering); 1433 1434 // Negative height means invert the image. 1435 if (src_height < 0) { 1436 src_height = -src_height; 1437 src = src + (src_height - 1) * src_stride; 1438 src_stride = -src_stride; 1439 } 1440 1441 // Use specialized scales to improve performance for common resolutions. 1442 // For example, all the 1/2 scalings will use ScalePlaneDown2() 1443 if (dst_width == src_width && dst_height == src_height) { 1444 // Straight copy. 1445 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); 1446 return; 1447 } 1448 if (dst_width == src_width) { 1449 int dy = FixedDiv(src_height, dst_height); 1450 // Arbitrary scale vertically, but unscaled vertically. 1451 ScalePlaneVertical(src_height, 1452 dst_width, dst_height, 1453 src_stride, dst_stride, src, dst, 1454 0, 0, dy, 1, filtering); 1455 return; 1456 } 1457 if (dst_width <= Abs(src_width) && dst_height <= src_height) { 1458 // Scale down. 1459 if (4 * dst_width == 3 * src_width && 1460 4 * dst_height == 3 * src_height) { 1461 // optimized, 3/4 1462 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, 1463 src_stride, dst_stride, src, dst, filtering); 1464 return; 1465 } 1466 if (2 * dst_width == src_width && 2 * dst_height == src_height) { 1467 // optimized, 1/2 1468 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, 1469 src_stride, dst_stride, src, dst, filtering); 1470 return; 1471 } 1472 // 3/8 rounded up for odd sized chroma height. 1473 if (8 * dst_width == 3 * src_width && 1474 dst_height == ((src_height * 3 + 7) / 8)) { 1475 // optimized, 3/8 1476 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, 1477 src_stride, dst_stride, src, dst, filtering); 1478 return; 1479 } 1480 if (4 * dst_width == src_width && 4 * dst_height == src_height && 1481 filtering != kFilterBilinear) { 1482 // optimized, 1/4 1483 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, 1484 src_stride, dst_stride, src, dst, filtering); 1485 return; 1486 } 1487 } 1488 if (filtering == kFilterBox && dst_height * 2 < src_height) { 1489 ScalePlaneBox(src_width, src_height, dst_width, dst_height, 1490 src_stride, dst_stride, src, dst); 1491 return; 1492 } 1493 if (filtering && dst_height > src_height) { 1494 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, 1495 src_stride, dst_stride, src, dst, filtering); 1496 return; 1497 } 1498 if (filtering) { 1499 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, 1500 src_stride, dst_stride, src, dst, filtering); 1501 return; 1502 } 1503 ScalePlaneSimple(src_width, src_height, dst_width, dst_height, 1504 src_stride, dst_stride, src, dst); 1505 } 1506 1507 LIBYUV_API 1508 void ScalePlane_16(const uint16* src, int src_stride, 1509 int src_width, int src_height, 1510 uint16* dst, int dst_stride, 1511 int dst_width, int dst_height, 1512 enum FilterMode filtering) { 1513 // Simplify filtering when possible. 1514 filtering = ScaleFilterReduce(src_width, src_height, 1515 dst_width, dst_height, 1516 filtering); 1517 1518 // Negative height means invert the image. 1519 if (src_height < 0) { 1520 src_height = -src_height; 1521 src = src + (src_height - 1) * src_stride; 1522 src_stride = -src_stride; 1523 } 1524 1525 // Use specialized scales to improve performance for common resolutions. 1526 // For example, all the 1/2 scalings will use ScalePlaneDown2() 1527 if (dst_width == src_width && dst_height == src_height) { 1528 // Straight copy. 1529 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height); 1530 return; 1531 } 1532 if (dst_width == src_width) { 1533 int dy = FixedDiv(src_height, dst_height); 1534 // Arbitrary scale vertically, but unscaled vertically. 1535 ScalePlaneVertical_16(src_height, 1536 dst_width, dst_height, 1537 src_stride, dst_stride, src, dst, 1538 0, 0, dy, 1, filtering); 1539 return; 1540 } 1541 if (dst_width <= Abs(src_width) && dst_height <= src_height) { 1542 // Scale down. 1543 if (4 * dst_width == 3 * src_width && 1544 4 * dst_height == 3 * src_height) { 1545 // optimized, 3/4 1546 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height, 1547 src_stride, dst_stride, src, dst, filtering); 1548 return; 1549 } 1550 if (2 * dst_width == src_width && 2 * dst_height == src_height) { 1551 // optimized, 1/2 1552 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height, 1553 src_stride, dst_stride, src, dst, filtering); 1554 return; 1555 } 1556 // 3/8 rounded up for odd sized chroma height. 1557 if (8 * dst_width == 3 * src_width && 1558 dst_height == ((src_height * 3 + 7) / 8)) { 1559 // optimized, 3/8 1560 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height, 1561 src_stride, dst_stride, src, dst, filtering); 1562 return; 1563 } 1564 if (4 * dst_width == src_width && 4 * dst_height == src_height && 1565 filtering != kFilterBilinear) { 1566 // optimized, 1/4 1567 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height, 1568 src_stride, dst_stride, src, dst, filtering); 1569 return; 1570 } 1571 } 1572 if (filtering == kFilterBox && dst_height * 2 < src_height) { 1573 ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, 1574 src_stride, dst_stride, src, dst); 1575 return; 1576 } 1577 if (filtering && dst_height > src_height) { 1578 ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, 1579 src_stride, dst_stride, src, dst, filtering); 1580 return; 1581 } 1582 if (filtering) { 1583 ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height, 1584 src_stride, dst_stride, src, dst, filtering); 1585 return; 1586 } 1587 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, 1588 src_stride, dst_stride, src, dst); 1589 } 1590 1591 // Scale an I420 image. 1592 // This function in turn calls a scaling function for each plane. 1593 1594 LIBYUV_API 1595 int I420Scale(const uint8* src_y, int src_stride_y, 1596 const uint8* src_u, int src_stride_u, 1597 const uint8* src_v, int src_stride_v, 1598 int src_width, int src_height, 1599 uint8* dst_y, int dst_stride_y, 1600 uint8* dst_u, int dst_stride_u, 1601 uint8* dst_v, int dst_stride_v, 1602 int dst_width, int dst_height, 1603 enum FilterMode filtering) { 1604 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); 1605 int src_halfheight = SUBSAMPLE(src_height, 1, 1); 1606 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); 1607 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); 1608 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || 1609 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { 1610 return -1; 1611 } 1612 1613 ScalePlane(src_y, src_stride_y, src_width, src_height, 1614 dst_y, dst_stride_y, dst_width, dst_height, 1615 filtering); 1616 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, 1617 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, 1618 filtering); 1619 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, 1620 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, 1621 filtering); 1622 return 0; 1623 } 1624 1625 LIBYUV_API 1626 int I420Scale_16(const uint16* src_y, int src_stride_y, 1627 const uint16* src_u, int src_stride_u, 1628 const uint16* src_v, int src_stride_v, 1629 int src_width, int src_height, 1630 uint16* dst_y, int dst_stride_y, 1631 uint16* dst_u, int dst_stride_u, 1632 uint16* dst_v, int dst_stride_v, 1633 int dst_width, int dst_height, 1634 enum FilterMode filtering) { 1635 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); 1636 int src_halfheight = SUBSAMPLE(src_height, 1, 1); 1637 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); 1638 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); 1639 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || 1640 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { 1641 return -1; 1642 } 1643 1644 ScalePlane_16(src_y, src_stride_y, src_width, src_height, 1645 dst_y, dst_stride_y, dst_width, dst_height, 1646 filtering); 1647 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, 1648 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, 1649 filtering); 1650 ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, 1651 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, 1652 filtering); 1653 return 0; 1654 } 1655 1656 // Deprecated api 1657 LIBYUV_API 1658 int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, 1659 int src_stride_y, int src_stride_u, int src_stride_v, 1660 int src_width, int src_height, 1661 uint8* dst_y, uint8* dst_u, uint8* dst_v, 1662 int dst_stride_y, int dst_stride_u, int dst_stride_v, 1663 int dst_width, int dst_height, 1664 LIBYUV_BOOL interpolate) { 1665 return I420Scale(src_y, src_stride_y, 1666 src_u, src_stride_u, 1667 src_v, src_stride_v, 1668 src_width, src_height, 1669 dst_y, dst_stride_y, 1670 dst_u, dst_stride_u, 1671 dst_v, dst_stride_v, 1672 dst_width, dst_height, 1673 interpolate ? kFilterBox : kFilterNone); 1674 } 1675 1676 // Deprecated api 1677 LIBYUV_API 1678 int ScaleOffset(const uint8* src, int src_width, int src_height, 1679 uint8* dst, int dst_width, int dst_height, int dst_yoffset, 1680 LIBYUV_BOOL interpolate) { 1681 // Chroma requires offset to multiple of 2. 1682 int dst_yoffset_even = dst_yoffset & ~1; 1683 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); 1684 int src_halfheight = SUBSAMPLE(src_height, 1, 1); 1685 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); 1686 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); 1687 int aheight = dst_height - dst_yoffset_even * 2; // actual output height 1688 const uint8* src_y = src; 1689 const uint8* src_u = src + src_width * src_height; 1690 const uint8* src_v = src + src_width * src_height + 1691 src_halfwidth * src_halfheight; 1692 uint8* dst_y = dst + dst_yoffset_even * dst_width; 1693 uint8* dst_u = dst + dst_width * dst_height + 1694 (dst_yoffset_even >> 1) * dst_halfwidth; 1695 uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight + 1696 (dst_yoffset_even >> 1) * dst_halfwidth; 1697 if (!src || src_width <= 0 || src_height <= 0 || 1698 !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 || 1699 dst_yoffset_even >= dst_height) { 1700 return -1; 1701 } 1702 return I420Scale(src_y, src_width, 1703 src_u, src_halfwidth, 1704 src_v, src_halfwidth, 1705 src_width, src_height, 1706 dst_y, dst_width, 1707 dst_u, dst_halfwidth, 1708 dst_v, dst_halfwidth, 1709 dst_width, aheight, 1710 interpolate ? kFilterBox : kFilterNone); 1711 } 1712 1713 #ifdef __cplusplus 1714 } // extern "C" 1715 } // namespace libyuv 1716 #endif 1717