1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/compare.h" 12 13 #include <float.h> 14 #include <math.h> 15 #ifdef _OPENMP 16 #include <omp.h> 17 #endif 18 19 #include "libyuv/basic_types.h" 20 #include "libyuv/compare_row.h" 21 #include "libyuv/cpu_id.h" 22 #include "libyuv/row.h" 23 #include "libyuv/video_common.h" 24 25 #ifdef __cplusplus 26 namespace libyuv { 27 extern "C" { 28 #endif 29 30 // hash seed of 5381 recommended. 31 LIBYUV_API 32 uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { 33 const int kBlockSize = 1 << 15; // 32768; 34 int remainder; 35 uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; 36 #if defined(HAS_HASHDJB2_SSE41) 37 if (TestCpuFlag(kCpuHasSSE41)) { 38 HashDjb2_SSE = HashDjb2_SSE41; 39 } 40 #endif 41 #if defined(HAS_HASHDJB2_AVX2) 42 if (TestCpuFlag(kCpuHasAVX2)) { 43 HashDjb2_SSE = HashDjb2_AVX2; 44 } 45 #endif 46 47 while (count >= (uint64)(kBlockSize)) { 48 seed = HashDjb2_SSE(src, kBlockSize, seed); 49 src += kBlockSize; 50 count -= kBlockSize; 51 } 52 remainder = (int)count & ~15; 53 if (remainder) { 54 seed = HashDjb2_SSE(src, remainder, seed); 55 src += remainder; 56 count -= remainder; 57 } 58 remainder = (int)count & 15; 59 if (remainder) { 60 seed = HashDjb2_C(src, remainder, seed); 61 } 62 return seed; 63 } 64 65 static uint32 ARGBDetectRow_C(const uint8* argb, int width) { 66 int x; 67 for (x = 0; x < width - 1; x += 2) { 68 if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB. 69 return FOURCC_BGRA; 70 } 71 if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA. 72 return FOURCC_ARGB; 73 } 74 if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255. 75 return FOURCC_BGRA; 76 } 77 if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255. 78 return FOURCC_ARGB; 79 } 80 argb += 8; 81 } 82 if (width & 1) { 83 if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB. 84 return FOURCC_BGRA; 85 } 86 if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA. 87 return FOURCC_ARGB; 88 } 89 } 90 return 0; 91 } 92 93 // Scan an opaque argb image and return fourcc based on alpha offset. 94 // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. 95 LIBYUV_API 96 uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) { 97 uint32 fourcc = 0; 98 int h; 99 100 // Coalesce rows. 101 if (stride_argb == width * 4) { 102 width *= height; 103 height = 1; 104 stride_argb = 0; 105 } 106 for (h = 0; h < height && fourcc == 0; ++h) { 107 fourcc = ARGBDetectRow_C(argb, width); 108 argb += stride_argb; 109 } 110 return fourcc; 111 } 112 113 // TODO(fbarchard): Refactor into row function. 114 LIBYUV_API 115 uint64 ComputeSumSquareError(const uint8* src_a, 116 const uint8* src_b, 117 int count) { 118 // SumSquareError returns values 0 to 65535 for each squared difference. 119 // Up to 65536 of those can be summed and remain within a uint32. 120 // After each block of 65536 pixels, accumulate into a uint64. 121 const int kBlockSize = 65536; 122 int remainder = count & (kBlockSize - 1) & ~31; 123 uint64 sse = 0; 124 int i; 125 uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = 126 SumSquareError_C; 127 #if defined(HAS_SUMSQUAREERROR_NEON) 128 if (TestCpuFlag(kCpuHasNEON)) { 129 SumSquareError = SumSquareError_NEON; 130 } 131 #endif 132 #if defined(HAS_SUMSQUAREERROR_SSE2) 133 if (TestCpuFlag(kCpuHasSSE2)) { 134 // Note only used for multiples of 16 so count is not checked. 135 SumSquareError = SumSquareError_SSE2; 136 } 137 #endif 138 #if defined(HAS_SUMSQUAREERROR_AVX2) 139 if (TestCpuFlag(kCpuHasAVX2)) { 140 // Note only used for multiples of 32 so count is not checked. 141 SumSquareError = SumSquareError_AVX2; 142 } 143 #endif 144 #ifdef _OPENMP 145 #pragma omp parallel for reduction(+ : sse) 146 #endif 147 for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { 148 sse += SumSquareError(src_a + i, src_b + i, kBlockSize); 149 } 150 src_a += count & ~(kBlockSize - 1); 151 src_b += count & ~(kBlockSize - 1); 152 if (remainder) { 153 sse += SumSquareError(src_a, src_b, remainder); 154 src_a += remainder; 155 src_b += remainder; 156 } 157 remainder = count & 31; 158 if (remainder) { 159 sse += SumSquareError_C(src_a, src_b, remainder); 160 } 161 return sse; 162 } 163 164 LIBYUV_API 165 uint64 ComputeSumSquareErrorPlane(const uint8* src_a, 166 int stride_a, 167 const uint8* src_b, 168 int stride_b, 169 int width, 170 int height) { 171 uint64 sse = 0; 172 int h; 173 // Coalesce rows. 174 if (stride_a == width && stride_b == width) { 175 width *= height; 176 height = 1; 177 stride_a = stride_b = 0; 178 } 179 for (h = 0; h < height; ++h) { 180 sse += ComputeSumSquareError(src_a, src_b, width); 181 src_a += stride_a; 182 src_b += stride_b; 183 } 184 return sse; 185 } 186 187 LIBYUV_API 188 double SumSquareErrorToPsnr(uint64 sse, uint64 count) { 189 double psnr; 190 if (sse > 0) { 191 double mse = (double)count / (double)sse; 192 psnr = 10.0 * log10(255.0 * 255.0 * mse); 193 } else { 194 psnr = kMaxPsnr; // Limit to prevent divide by 0 195 } 196 197 if (psnr > kMaxPsnr) 198 psnr = kMaxPsnr; 199 200 return psnr; 201 } 202 203 LIBYUV_API 204 double CalcFramePsnr(const uint8* src_a, 205 int stride_a, 206 const uint8* src_b, 207 int stride_b, 208 int width, 209 int height) { 210 const uint64 samples = width * height; 211 const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b, 212 stride_b, width, height); 213 return SumSquareErrorToPsnr(sse, samples); 214 } 215 216 LIBYUV_API 217 double I420Psnr(const uint8* src_y_a, 218 int stride_y_a, 219 const uint8* src_u_a, 220 int stride_u_a, 221 const uint8* src_v_a, 222 int stride_v_a, 223 const uint8* src_y_b, 224 int stride_y_b, 225 const uint8* src_u_b, 226 int stride_u_b, 227 const uint8* src_v_b, 228 int stride_v_b, 229 int width, 230 int height) { 231 const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, src_y_b, 232 stride_y_b, width, height); 233 const int width_uv = (width + 1) >> 1; 234 const int height_uv = (height + 1) >> 1; 235 const uint64 sse_u = ComputeSumSquareErrorPlane( 236 src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv); 237 const uint64 sse_v = ComputeSumSquareErrorPlane( 238 src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv); 239 const uint64 samples = width * height + 2 * (width_uv * height_uv); 240 const uint64 sse = sse_y + sse_u + sse_v; 241 return SumSquareErrorToPsnr(sse, samples); 242 } 243 244 static const int64 cc1 = 26634; // (64^2*(.01*255)^2 245 static const int64 cc2 = 239708; // (64^2*(.03*255)^2 246 247 static double Ssim8x8_C(const uint8* src_a, 248 int stride_a, 249 const uint8* src_b, 250 int stride_b) { 251 int64 sum_a = 0; 252 int64 sum_b = 0; 253 int64 sum_sq_a = 0; 254 int64 sum_sq_b = 0; 255 int64 sum_axb = 0; 256 257 int i; 258 for (i = 0; i < 8; ++i) { 259 int j; 260 for (j = 0; j < 8; ++j) { 261 sum_a += src_a[j]; 262 sum_b += src_b[j]; 263 sum_sq_a += src_a[j] * src_a[j]; 264 sum_sq_b += src_b[j] * src_b[j]; 265 sum_axb += src_a[j] * src_b[j]; 266 } 267 268 src_a += stride_a; 269 src_b += stride_b; 270 } 271 272 { 273 const int64 count = 64; 274 // scale the constants by number of pixels 275 const int64 c1 = (cc1 * count * count) >> 12; 276 const int64 c2 = (cc2 * count * count) >> 12; 277 278 const int64 sum_a_x_sum_b = sum_a * sum_b; 279 280 const int64 ssim_n = (2 * sum_a_x_sum_b + c1) * 281 (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); 282 283 const int64 sum_a_sq = sum_a * sum_a; 284 const int64 sum_b_sq = sum_b * sum_b; 285 286 const int64 ssim_d = 287 (sum_a_sq + sum_b_sq + c1) * 288 (count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2); 289 290 if (ssim_d == 0.0) { 291 return DBL_MAX; 292 } 293 return ssim_n * 1.0 / ssim_d; 294 } 295 } 296 297 // We are using a 8x8 moving window with starting location of each 8x8 window 298 // on the 4x4 pixel grid. Such arrangement allows the windows to overlap 299 // block boundaries to penalize blocking artifacts. 300 LIBYUV_API 301 double CalcFrameSsim(const uint8* src_a, 302 int stride_a, 303 const uint8* src_b, 304 int stride_b, 305 int width, 306 int height) { 307 int samples = 0; 308 double ssim_total = 0; 309 double (*Ssim8x8)(const uint8* src_a, int stride_a, const uint8* src_b, 310 int stride_b) = Ssim8x8_C; 311 312 // sample point start with each 4x4 location 313 int i; 314 for (i = 0; i < height - 8; i += 4) { 315 int j; 316 for (j = 0; j < width - 8; j += 4) { 317 ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b); 318 samples++; 319 } 320 321 src_a += stride_a * 4; 322 src_b += stride_b * 4; 323 } 324 325 ssim_total /= samples; 326 return ssim_total; 327 } 328 329 LIBYUV_API 330 double I420Ssim(const uint8* src_y_a, 331 int stride_y_a, 332 const uint8* src_u_a, 333 int stride_u_a, 334 const uint8* src_v_a, 335 int stride_v_a, 336 const uint8* src_y_b, 337 int stride_y_b, 338 const uint8* src_u_b, 339 int stride_u_b, 340 const uint8* src_v_b, 341 int stride_v_b, 342 int width, 343 int height) { 344 const double ssim_y = 345 CalcFrameSsim(src_y_a, stride_y_a, src_y_b, stride_y_b, width, height); 346 const int width_uv = (width + 1) >> 1; 347 const int height_uv = (height + 1) >> 1; 348 const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, src_u_b, stride_u_b, 349 width_uv, height_uv); 350 const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, src_v_b, stride_v_b, 351 width_uv, height_uv); 352 return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v); 353 } 354 355 #ifdef __cplusplus 356 } // extern "C" 357 } // namespace libyuv 358 #endif 359