1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <stdlib.h> 12 #include <time.h> 13 14 #include "libyuv/cpu_id.h" 15 #include "libyuv/scale_argb.h" 16 #include "libyuv/row.h" 17 #include "../unit_test/unit_test.h" 18 19 namespace libyuv { 20 21 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. 22 static int ARGBTestFilter(int src_width, int src_height, 23 int dst_width, int dst_height, 24 FilterMode f, int benchmark_iterations) { 25 int i, j; 26 const int b = 0; // 128 to test for padding/stride. 27 int src_argb_plane_size = (Abs(src_width) + b * 2) * 28 (Abs(src_height) + b * 2) * 4; 29 int src_stride_argb = (b * 2 + Abs(src_width)) * 4; 30 31 align_buffer_page_end(src_argb, src_argb_plane_size); 32 srandom(time(NULL)); 33 MemRandomize(src_argb, src_argb_plane_size); 34 35 int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; 36 int dst_stride_argb = (b * 2 + dst_width) * 4; 37 38 align_buffer_page_end(dst_argb_c, dst_argb_plane_size); 39 align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); 40 memset(dst_argb_c, 2, dst_argb_plane_size); 41 memset(dst_argb_opt, 3, dst_argb_plane_size); 42 43 // Warm up both versions for consistent benchmarks. 44 MaskCpuFlags(0); // Disable all CPU optimization. 45 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 46 src_width, src_height, 47 dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, 48 dst_width, dst_height, f); 49 MaskCpuFlags(-1); // Enable all CPU optimization. 50 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 51 src_width, src_height, 52 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, 53 dst_width, dst_height, f); 54 55 MaskCpuFlags(0); // Disable all CPU optimization. 56 double c_time = get_time(); 57 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 58 src_width, src_height, 59 dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, 60 dst_width, dst_height, f); 61 62 c_time = (get_time() - c_time); 63 64 MaskCpuFlags(-1); // Enable all CPU optimization. 65 double opt_time = get_time(); 66 for (i = 0; i < benchmark_iterations; ++i) { 67 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 68 src_width, src_height, 69 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, 70 dst_width, dst_height, f); 71 } 72 opt_time = (get_time() - opt_time) / benchmark_iterations; 73 74 // Report performance of C vs OPT 75 printf("filter %d - %8d us C - %8d us OPT\n", 76 f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); 77 78 // C version may be a little off from the optimized. Order of 79 // operations may introduce rounding somewhere. So do a difference 80 // of the buffers and look to see that the max difference isn't 81 // over 2. 82 int max_diff = 0; 83 for (i = b; i < (dst_height + b); ++i) { 84 for (j = b * 4; j < (dst_width + b) * 4; ++j) { 85 int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - 86 dst_argb_opt[(i * dst_stride_argb) + j]); 87 if (abs_diff > max_diff) { 88 max_diff = abs_diff; 89 } 90 } 91 } 92 93 free_aligned_buffer_page_end(dst_argb_c); 94 free_aligned_buffer_page_end(dst_argb_opt); 95 free_aligned_buffer_page_end(src_argb); 96 return max_diff; 97 } 98 99 static const int kTileX = 8; 100 static const int kTileY = 8; 101 102 static int TileARGBScale(const uint8* src_argb, int src_stride_argb, 103 int src_width, int src_height, 104 uint8* dst_argb, int dst_stride_argb, 105 int dst_width, int dst_height, 106 FilterMode filtering) { 107 for (int y = 0; y < dst_height; y += kTileY) { 108 for (int x = 0; x < dst_width; x += kTileX) { 109 int clip_width = kTileX; 110 if (x + clip_width > dst_width) { 111 clip_width = dst_width - x; 112 } 113 int clip_height = kTileY; 114 if (y + clip_height > dst_height) { 115 clip_height = dst_height - y; 116 } 117 int r = ARGBScaleClip(src_argb, src_stride_argb, 118 src_width, src_height, 119 dst_argb, dst_stride_argb, 120 dst_width, dst_height, 121 x, y, clip_width, clip_height, filtering); 122 if (r) { 123 return r; 124 } 125 } 126 } 127 return 0; 128 } 129 130 static int ARGBClipTestFilter(int src_width, int src_height, 131 int dst_width, int dst_height, 132 FilterMode f, int benchmark_iterations) { 133 const int b = 128; 134 int src_argb_plane_size = (Abs(src_width) + b * 2) * 135 (Abs(src_height) + b * 2) * 4; 136 int src_stride_argb = (b * 2 + Abs(src_width)) * 4; 137 138 align_buffer_64(src_argb, src_argb_plane_size); 139 memset(src_argb, 1, src_argb_plane_size); 140 141 int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; 142 int dst_stride_argb = (b * 2 + dst_width) * 4; 143 144 srandom(time(NULL)); 145 146 int i, j; 147 for (i = b; i < (Abs(src_height) + b); ++i) { 148 for (j = b; j < (Abs(src_width) + b) * 4; ++j) { 149 src_argb[(i * src_stride_argb) + j] = (random() & 0xff); 150 } 151 } 152 153 align_buffer_64(dst_argb_c, dst_argb_plane_size); 154 align_buffer_64(dst_argb_opt, dst_argb_plane_size); 155 memset(dst_argb_c, 2, dst_argb_plane_size); 156 memset(dst_argb_opt, 3, dst_argb_plane_size); 157 158 // Do full image, no clipping. 159 double c_time = get_time(); 160 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 161 src_width, src_height, 162 dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, 163 dst_width, dst_height, f); 164 c_time = (get_time() - c_time); 165 166 // Do tiled image, clipping scale to a tile at a time. 167 double opt_time = get_time(); 168 for (i = 0; i < benchmark_iterations; ++i) { 169 TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, 170 src_width, src_height, 171 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, 172 dst_width, dst_height, f); 173 } 174 opt_time = (get_time() - opt_time) / benchmark_iterations; 175 176 // Report performance of Full vs Tiled. 177 printf("filter %d - %8d us Full - %8d us Tiled\n", 178 f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); 179 180 // Compare full scaled image vs tiled image. 181 int max_diff = 0; 182 for (i = b; i < (dst_height + b); ++i) { 183 for (j = b * 4; j < (dst_width + b) * 4; ++j) { 184 int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - 185 dst_argb_opt[(i * dst_stride_argb) + j]); 186 if (abs_diff > max_diff) { 187 max_diff = abs_diff; 188 } 189 } 190 } 191 192 free_aligned_buffer_64(dst_argb_c); 193 free_aligned_buffer_64(dst_argb_opt); 194 free_aligned_buffer_64(src_argb); 195 return max_diff; 196 } 197 198 #define TEST_FACTOR1(name, filter, hfactor, vfactor, max_diff) \ 199 TEST_F(libyuvTest, ARGBScaleDownBy##name##_##filter) { \ 200 int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \ 201 Abs(benchmark_width_) * hfactor, \ 202 Abs(benchmark_height_) * vfactor, \ 203 kFilter##filter, benchmark_iterations_); \ 204 EXPECT_LE(diff, max_diff); \ 205 } \ 206 TEST_F(libyuvTest, ARGBScaleDownClipBy##name##_##filter) { \ 207 int diff = ARGBClipTestFilter(benchmark_width_, benchmark_height_, \ 208 Abs(benchmark_width_) * hfactor, \ 209 Abs(benchmark_height_) * vfactor, \ 210 kFilter##filter, benchmark_iterations_); \ 211 EXPECT_LE(diff, max_diff); \ 212 } 213 214 // Test a scale factor with 2 filters. Expect unfiltered to be exact, but 215 // filtering is different fixed point implementations for SSSE3, Neon and C. 216 #define TEST_FACTOR(name, hfactor, vfactor) \ 217 TEST_FACTOR1(name, None, hfactor, vfactor, 2) \ 218 TEST_FACTOR1(name, Linear, hfactor, vfactor, 2) \ 219 TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2) \ 220 TEST_FACTOR1(name, Box, hfactor, vfactor, 2) 221 222 TEST_FACTOR(2, 1 / 2, 1 / 2) 223 TEST_FACTOR(4, 1 / 4, 1 / 4) 224 TEST_FACTOR(8, 1 / 8, 1 / 8) 225 TEST_FACTOR(3by4, 3 / 4, 3 / 4) 226 #undef TEST_FACTOR1 227 #undef TEST_FACTOR 228 229 #define TEST_SCALETO1(name, width, height, filter, max_diff) \ 230 TEST_F(libyuvTest, name##To##width##x##height##_##filter) { \ 231 int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \ 232 width, height, \ 233 kFilter##filter, benchmark_iterations_); \ 234 EXPECT_LE(diff, max_diff); \ 235 } \ 236 TEST_F(libyuvTest, name##From##width##x##height##_##filter) { \ 237 int diff = ARGBTestFilter(width, height, \ 238 Abs(benchmark_width_), Abs(benchmark_height_), \ 239 kFilter##filter, benchmark_iterations_); \ 240 EXPECT_LE(diff, max_diff); \ 241 } \ 242 TEST_F(libyuvTest, name##ClipTo##width##x##height##_##filter) { \ 243 int diff = ARGBClipTestFilter(benchmark_width_, benchmark_height_, \ 244 width, height, \ 245 kFilter##filter, benchmark_iterations_); \ 246 EXPECT_LE(diff, max_diff); \ 247 } \ 248 TEST_F(libyuvTest, name##ClipFrom##width##x##height##_##filter) { \ 249 int diff = ARGBClipTestFilter(width, height, \ 250 Abs(benchmark_width_), Abs(benchmark_height_), \ 251 kFilter##filter, benchmark_iterations_); \ 252 EXPECT_LE(diff, max_diff); \ 253 } 254 255 /// Test scale to a specified size with all 4 filters. 256 #define TEST_SCALETO(name, width, height) \ 257 TEST_SCALETO1(name, width, height, None, 0) \ 258 TEST_SCALETO1(name, width, height, Linear, 3) \ 259 TEST_SCALETO1(name, width, height, Bilinear, 3) \ 260 TEST_SCALETO1(name, width, height, Box, 3) 261 262 TEST_SCALETO(ARGBScale, 1, 1) 263 TEST_SCALETO(ARGBScale, 320, 240) 264 TEST_SCALETO(ARGBScale, 352, 288) 265 TEST_SCALETO(ARGBScale, 569, 480) 266 TEST_SCALETO(ARGBScale, 640, 360) 267 TEST_SCALETO(ARGBScale, 1280, 720) 268 #undef TEST_SCALETO1 269 #undef TEST_SCALETO 270 271 } // namespace libyuv 272