1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <stdlib.h> 12 #include <time.h> 13 14 #include "../unit_test/unit_test.h" 15 #include "libyuv/convert_argb.h" 16 #include "libyuv/cpu_id.h" 17 #include "libyuv/scale_argb.h" 18 #include "libyuv/video_common.h" 19 20 namespace libyuv { 21 22 #define STRINGIZE(line) #line 23 #define FILELINESTR(file, line) file ":" STRINGIZE(line) 24 25 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. 26 static int ARGBTestFilter(int src_width, 27 int src_height, 28 int dst_width, 29 int dst_height, 30 FilterMode f, 31 int benchmark_iterations, 32 int disable_cpu_flags, 33 int benchmark_cpu_info) { 34 if (!SizeValid(src_width, src_height, dst_width, dst_height)) { 35 return 0; 36 } 37 38 int i, j; 39 const int b = 0; // 128 to test for padding/stride. 40 int64 src_argb_plane_size = 41 (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4LL; 42 int src_stride_argb = (b * 2 + Abs(src_width)) * 4; 43 44 align_buffer_page_end(src_argb, src_argb_plane_size); 45 if (!src_argb) { 46 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); 47 return 0; 48 } 49 MemRandomize(src_argb, src_argb_plane_size); 50 51 int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4LL; 52 int dst_stride_argb = (b * 2 + dst_width) * 4; 53 54 align_buffer_page_end(dst_argb_c, dst_argb_plane_size); 55 align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); 56 if (!dst_argb_c || !dst_argb_opt) { 57 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); 58 return 0; 59 } 60 memset(dst_argb_c, 2, dst_argb_plane_size); 61 memset(dst_argb_opt, 3, dst_argb_plane_size); 62 63 // Warm up both versions for consistent benchmarks. 64 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. 65 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 66 src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4, 67 dst_stride_argb, dst_width, dst_height, f); 68 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. 69 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 70 src_width, src_height, dst_argb_opt + (dst_stride_argb * b) + b * 4, 71 dst_stride_argb, dst_width, dst_height, f); 72 73 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. 74 double c_time = get_time(); 75 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 76 src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4, 77 dst_stride_argb, dst_width, dst_height, f); 78 79 c_time = (get_time() - c_time); 80 81 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. 82 double opt_time = get_time(); 83 for (i = 0; i < benchmark_iterations; ++i) { 84 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 85 src_width, src_height, 86 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, 87 dst_width, dst_height, f); 88 } 89 opt_time = (get_time() - opt_time) / benchmark_iterations; 90 91 // Report performance of C vs OPT 92 printf("filter %d - %8d us C - %8d us OPT\n", f, 93 static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); 94 95 // C version may be a little off from the optimized. Order of 96 // operations may introduce rounding somewhere. So do a difference 97 // of the buffers and look to see that the max difference isn't 98 // over 2. 99 int max_diff = 0; 100 for (i = b; i < (dst_height + b); ++i) { 101 for (j = b * 4; j < (dst_width + b) * 4; ++j) { 102 int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - 103 dst_argb_opt[(i * dst_stride_argb) + j]); 104 if (abs_diff > max_diff) { 105 max_diff = abs_diff; 106 } 107 } 108 } 109 110 free_aligned_buffer_page_end(dst_argb_c); 111 free_aligned_buffer_page_end(dst_argb_opt); 112 free_aligned_buffer_page_end(src_argb); 113 return max_diff; 114 } 115 116 static const int kTileX = 8; 117 static const int kTileY = 8; 118 119 static int TileARGBScale(const uint8* src_argb, 120 int src_stride_argb, 121 int src_width, 122 int src_height, 123 uint8* dst_argb, 124 int dst_stride_argb, 125 int dst_width, 126 int dst_height, 127 FilterMode filtering) { 128 for (int y = 0; y < dst_height; y += kTileY) { 129 for (int x = 0; x < dst_width; x += kTileX) { 130 int clip_width = kTileX; 131 if (x + clip_width > dst_width) { 132 clip_width = dst_width - x; 133 } 134 int clip_height = kTileY; 135 if (y + clip_height > dst_height) { 136 clip_height = dst_height - y; 137 } 138 int r = ARGBScaleClip(src_argb, src_stride_argb, src_width, src_height, 139 dst_argb, dst_stride_argb, dst_width, dst_height, x, 140 y, clip_width, clip_height, filtering); 141 if (r) { 142 return r; 143 } 144 } 145 } 146 return 0; 147 } 148 149 static int ARGBClipTestFilter(int src_width, 150 int src_height, 151 int dst_width, 152 int dst_height, 153 FilterMode f, 154 int benchmark_iterations) { 155 if (!SizeValid(src_width, src_height, dst_width, dst_height)) { 156 return 0; 157 } 158 159 const int b = 128; 160 int64 src_argb_plane_size = 161 (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4; 162 int src_stride_argb = (b * 2 + Abs(src_width)) * 4; 163 164 align_buffer_page_end(src_argb, src_argb_plane_size); 165 if (!src_argb) { 166 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); 167 return 0; 168 } 169 memset(src_argb, 1, src_argb_plane_size); 170 171 int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; 172 int dst_stride_argb = (b * 2 + dst_width) * 4; 173 174 int i, j; 175 for (i = b; i < (Abs(src_height) + b); ++i) { 176 for (j = b; j < (Abs(src_width) + b) * 4; ++j) { 177 src_argb[(i * src_stride_argb) + j] = (fastrand() & 0xff); 178 } 179 } 180 181 align_buffer_page_end(dst_argb_c, dst_argb_plane_size); 182 align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); 183 if (!dst_argb_c || !dst_argb_opt) { 184 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); 185 return 0; 186 } 187 memset(dst_argb_c, 2, dst_argb_plane_size); 188 memset(dst_argb_opt, 3, dst_argb_plane_size); 189 190 // Do full image, no clipping. 191 double c_time = get_time(); 192 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 193 src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4, 194 dst_stride_argb, dst_width, dst_height, f); 195 c_time = (get_time() - c_time); 196 197 // Do tiled image, clipping scale to a tile at a time. 198 double opt_time = get_time(); 199 for (i = 0; i < benchmark_iterations; ++i) { 200 TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 201 src_width, src_height, 202 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, 203 dst_width, dst_height, f); 204 } 205 opt_time = (get_time() - opt_time) / benchmark_iterations; 206 207 // Report performance of Full vs Tiled. 208 printf("filter %d - %8d us Full - %8d us Tiled\n", f, 209 static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); 210 211 // Compare full scaled image vs tiled image. 212 int max_diff = 0; 213 for (i = b; i < (dst_height + b); ++i) { 214 for (j = b * 4; j < (dst_width + b) * 4; ++j) { 215 int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - 216 dst_argb_opt[(i * dst_stride_argb) + j]); 217 if (abs_diff > max_diff) { 218 max_diff = abs_diff; 219 } 220 } 221 } 222 223 free_aligned_buffer_page_end(dst_argb_c); 224 free_aligned_buffer_page_end(dst_argb_opt); 225 free_aligned_buffer_page_end(src_argb); 226 return max_diff; 227 } 228 229 // The following adjustments in dimensions ensure the scale factor will be 230 // exactly achieved. 231 #define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom) 232 #define SX(x, nom, denom) static_cast<int>((x / nom) * denom) 233 234 #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ 235 TEST_F(LibYUVScaleTest, ARGBScaleDownBy##name##_##filter) { \ 236 int diff = ARGBTestFilter( \ 237 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ 238 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ 239 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ 240 benchmark_cpu_info_); \ 241 EXPECT_LE(diff, max_diff); \ 242 } \ 243 TEST_F(LibYUVScaleTest, ARGBScaleDownClipBy##name##_##filter) { \ 244 int diff = ARGBClipTestFilter( \ 245 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ 246 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ 247 kFilter##filter, benchmark_iterations_); \ 248 EXPECT_LE(diff, max_diff); \ 249 } 250 251 // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but 252 // filtering is different fixed point implementations for SSSE3, Neon and C. 253 #define TEST_FACTOR(name, nom, denom) \ 254 TEST_FACTOR1(name, None, nom, denom, 0) \ 255 TEST_FACTOR1(name, Linear, nom, denom, 3) \ 256 TEST_FACTOR1(name, Bilinear, nom, denom, 3) \ 257 TEST_FACTOR1(name, Box, nom, denom, 3) 258 259 TEST_FACTOR(2, 1, 2) 260 TEST_FACTOR(4, 1, 4) 261 TEST_FACTOR(8, 1, 8) 262 TEST_FACTOR(3by4, 3, 4) 263 TEST_FACTOR(3by8, 3, 8) 264 TEST_FACTOR(3, 1, 3) 265 #undef TEST_FACTOR1 266 #undef TEST_FACTOR 267 #undef SX 268 #undef DX 269 270 #define TEST_SCALETO1(name, width, height, filter, max_diff) \ 271 TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \ 272 int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, width, \ 273 height, kFilter##filter, benchmark_iterations_, \ 274 disable_cpu_flags_, benchmark_cpu_info_); \ 275 EXPECT_LE(diff, max_diff); \ 276 } \ 277 TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ 278 int diff = ARGBTestFilter(width, height, Abs(benchmark_width_), \ 279 Abs(benchmark_height_), kFilter##filter, \ 280 benchmark_iterations_, disable_cpu_flags_, \ 281 benchmark_cpu_info_); \ 282 EXPECT_LE(diff, max_diff); \ 283 } \ 284 TEST_F(LibYUVScaleTest, name##ClipTo##width##x##height##_##filter) { \ 285 int diff = \ 286 ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \ 287 kFilter##filter, benchmark_iterations_); \ 288 EXPECT_LE(diff, max_diff); \ 289 } \ 290 TEST_F(LibYUVScaleTest, name##ClipFrom##width##x##height##_##filter) { \ 291 int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_), \ 292 Abs(benchmark_height_), kFilter##filter, \ 293 benchmark_iterations_); \ 294 EXPECT_LE(diff, max_diff); \ 295 } 296 297 /// Test scale to a specified size with all 4 filters. 298 #define TEST_SCALETO(name, width, height) \ 299 TEST_SCALETO1(name, width, height, None, 0) \ 300 TEST_SCALETO1(name, width, height, Linear, 3) \ 301 TEST_SCALETO1(name, width, height, Bilinear, 3) 302 303 TEST_SCALETO(ARGBScale, 1, 1) 304 TEST_SCALETO(ARGBScale, 320, 240) 305 TEST_SCALETO(ARGBScale, 352, 288) 306 TEST_SCALETO(ARGBScale, 569, 480) 307 TEST_SCALETO(ARGBScale, 640, 360) 308 TEST_SCALETO(ARGBScale, 1280, 720) 309 #undef TEST_SCALETO1 310 #undef TEST_SCALETO 311 312 // Scale with YUV conversion to ARGB and clipping. 313 LIBYUV_API 314 int YUVToARGBScaleReference2(const uint8* src_y, 315 int src_stride_y, 316 const uint8* src_u, 317 int src_stride_u, 318 const uint8* src_v, 319 int src_stride_v, 320 uint32 /* src_fourcc */, // TODO: Add support. 321 int src_width, 322 int src_height, 323 uint8* dst_argb, 324 int dst_stride_argb, 325 uint32 /* dst_fourcc */, // TODO: Add support. 326 int dst_width, 327 int dst_height, 328 int clip_x, 329 int clip_y, 330 int clip_width, 331 int clip_height, 332 enum FilterMode filtering) { 333 uint8* argb_buffer = static_cast<uint8*>(malloc(src_width * src_height * 4)); 334 int r; 335 I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, 336 argb_buffer, src_width * 4, src_width, src_height); 337 338 r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb, 339 dst_stride_argb, dst_width, dst_height, clip_x, clip_y, 340 clip_width, clip_height, filtering); 341 free(argb_buffer); 342 return r; 343 } 344 345 static void FillRamp(uint8* buf, int width, int height, int v, int dx, int dy) { 346 int rv = v; 347 for (int y = 0; y < height; ++y) { 348 for (int x = 0; x < width; ++x) { 349 *buf++ = v; 350 v += dx; 351 if (v < 0 || v > 255) { 352 dx = -dx; 353 v += dx; 354 } 355 } 356 v = rv + dy; 357 if (v < 0 || v > 255) { 358 dy = -dy; 359 v += dy; 360 } 361 rv = v; 362 } 363 } 364 365 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. 366 static int YUVToARGBTestFilter(int src_width, 367 int src_height, 368 int dst_width, 369 int dst_height, 370 FilterMode f, 371 int benchmark_iterations) { 372 int64 src_y_plane_size = Abs(src_width) * Abs(src_height); 373 int64 src_uv_plane_size = 374 ((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2); 375 int src_stride_y = Abs(src_width); 376 int src_stride_uv = (Abs(src_width) + 1) / 2; 377 378 align_buffer_page_end(src_y, src_y_plane_size); 379 align_buffer_page_end(src_u, src_uv_plane_size); 380 align_buffer_page_end(src_v, src_uv_plane_size); 381 382 int64 dst_argb_plane_size = (dst_width) * (dst_height)*4LL; 383 int dst_stride_argb = (dst_width)*4; 384 align_buffer_page_end(dst_argb_c, dst_argb_plane_size); 385 align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); 386 if (!dst_argb_c || !dst_argb_opt || !src_y || !src_u || !src_v) { 387 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); 388 return 0; 389 } 390 // Fill YUV image with continuous ramp, which is less sensitive to 391 // subsampling and filtering differences for test purposes. 392 FillRamp(src_y, Abs(src_width), Abs(src_height), 128, 1, 1); 393 FillRamp(src_u, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 3, 1, 1); 394 FillRamp(src_v, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 4, 1, 1); 395 memset(dst_argb_c, 2, dst_argb_plane_size); 396 memset(dst_argb_opt, 3, dst_argb_plane_size); 397 398 YUVToARGBScaleReference2(src_y, src_stride_y, src_u, src_stride_uv, src_v, 399 src_stride_uv, libyuv::FOURCC_I420, src_width, 400 src_height, dst_argb_c, dst_stride_argb, 401 libyuv::FOURCC_I420, dst_width, dst_height, 0, 0, 402 dst_width, dst_height, f); 403 404 for (int i = 0; i < benchmark_iterations; ++i) { 405 YUVToARGBScaleClip(src_y, src_stride_y, src_u, src_stride_uv, src_v, 406 src_stride_uv, libyuv::FOURCC_I420, src_width, 407 src_height, dst_argb_opt, dst_stride_argb, 408 libyuv::FOURCC_I420, dst_width, dst_height, 0, 0, 409 dst_width, dst_height, f); 410 } 411 int max_diff = 0; 412 for (int i = 0; i < dst_height; ++i) { 413 for (int j = 0; j < dst_width * 4; ++j) { 414 int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - 415 dst_argb_opt[(i * dst_stride_argb) + j]); 416 if (abs_diff > max_diff) { 417 printf("error %d at %d,%d c %d opt %d", abs_diff, j, i, 418 dst_argb_c[(i * dst_stride_argb) + j], 419 dst_argb_opt[(i * dst_stride_argb) + j]); 420 EXPECT_LE(abs_diff, 40); 421 max_diff = abs_diff; 422 } 423 } 424 } 425 426 free_aligned_buffer_page_end(dst_argb_c); 427 free_aligned_buffer_page_end(dst_argb_opt); 428 free_aligned_buffer_page_end(src_y); 429 free_aligned_buffer_page_end(src_u); 430 free_aligned_buffer_page_end(src_v); 431 return max_diff; 432 } 433 434 TEST_F(LibYUVScaleTest, YUVToRGBScaleUp) { 435 int diff = 436 YUVToARGBTestFilter(benchmark_width_, benchmark_height_, 437 benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, 438 libyuv::kFilterBilinear, benchmark_iterations_); 439 EXPECT_LE(diff, 10); 440 } 441 442 TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) { 443 int diff = YUVToARGBTestFilter( 444 benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, benchmark_width_, 445 benchmark_height_, libyuv::kFilterBilinear, benchmark_iterations_); 446 EXPECT_LE(diff, 10); 447 } 448 449 } // namespace libyuv 450