1 // Copyright 2011 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // Speed-critical encoding functions. 11 // 12 // Author: Skal (pascal.massimino (at) gmail.com) 13 14 #include <assert.h> 15 #include <stdlib.h> // for abs() 16 17 #include "./dsp.h" 18 #include "../enc/vp8enci.h" 19 20 static WEBP_INLINE uint8_t clip_8b(int v) { 21 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; 22 } 23 24 static WEBP_INLINE int clip_max(int v, int max) { 25 return (v > max) ? max : v; 26 } 27 28 //------------------------------------------------------------------------------ 29 // Compute susceptibility based on DCT-coeff histograms: 30 // the higher, the "easier" the macroblock is to compress. 31 32 const int VP8DspScan[16 + 4 + 4] = { 33 // Luma 34 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 35 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 36 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 37 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, 38 39 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 40 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V 41 }; 42 43 // general-purpose util function 44 void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], 45 VP8Histogram* const histo) { 46 int max_value = 0, last_non_zero = 1; 47 int k; 48 for (k = 0; k <= MAX_COEFF_THRESH; ++k) { 49 const int value = distribution[k]; 50 if (value > 0) { 51 if (value > max_value) max_value = value; 52 last_non_zero = k; 53 } 54 } 55 histo->max_value = max_value; 56 histo->last_non_zero = last_non_zero; 57 } 58 59 static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, 60 int start_block, int end_block, 61 VP8Histogram* const histo) { 62 int j; 63 int distribution[MAX_COEFF_THRESH + 1] = { 0 }; 64 for (j = start_block; j < end_block; ++j) { 65 int k; 66 int16_t out[16]; 67 68 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); 69 70 // Convert coefficients to bin. 71 for (k = 0; k < 16; ++k) { 72 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? 73 const int clipped_value = clip_max(v, MAX_COEFF_THRESH); 74 ++distribution[clipped_value]; 75 } 76 } 77 VP8SetHistogramData(distribution, histo); 78 } 79 80 //------------------------------------------------------------------------------ 81 // run-time tables (~4k) 82 83 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] 84 85 // We declare this variable 'volatile' to prevent instruction reordering 86 // and make sure it's set to true _last_ (so as to be thread-safe) 87 static volatile int tables_ok = 0; 88 89 static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) { 90 if (!tables_ok) { 91 int i; 92 for (i = -255; i <= 255 + 255; ++i) { 93 clip1[255 + i] = clip_8b(i); 94 } 95 tables_ok = 1; 96 } 97 } 98 99 100 //------------------------------------------------------------------------------ 101 // Transforms (Paragraph 14.4) 102 103 #define STORE(x, y, v) \ 104 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) 105 106 static const int kC1 = 20091 + (1 << 16); 107 static const int kC2 = 35468; 108 #define MUL(a, b) (((a) * (b)) >> 16) 109 110 static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, 111 uint8_t* dst) { 112 int C[4 * 4], *tmp; 113 int i; 114 tmp = C; 115 for (i = 0; i < 4; ++i) { // vertical pass 116 const int a = in[0] + in[8]; 117 const int b = in[0] - in[8]; 118 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); 119 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); 120 tmp[0] = a + d; 121 tmp[1] = b + c; 122 tmp[2] = b - c; 123 tmp[3] = a - d; 124 tmp += 4; 125 in++; 126 } 127 128 tmp = C; 129 for (i = 0; i < 4; ++i) { // horizontal pass 130 const int dc = tmp[0] + 4; 131 const int a = dc + tmp[8]; 132 const int b = dc - tmp[8]; 133 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); 134 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); 135 STORE(0, i, a + d); 136 STORE(1, i, b + c); 137 STORE(2, i, b - c); 138 STORE(3, i, a - d); 139 tmp++; 140 } 141 } 142 143 static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, 144 int do_two) { 145 ITransformOne(ref, in, dst); 146 if (do_two) { 147 ITransformOne(ref + 4, in + 16, dst + 4); 148 } 149 } 150 151 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { 152 int i; 153 int tmp[16]; 154 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) { 155 const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255]) 156 const int d1 = src[1] - ref[1]; 157 const int d2 = src[2] - ref[2]; 158 const int d3 = src[3] - ref[3]; 159 const int a0 = (d0 + d3); // 10b [-510,510] 160 const int a1 = (d1 + d2); 161 const int a2 = (d1 - d2); 162 const int a3 = (d0 - d3); 163 tmp[0 + i * 4] = (a0 + a1) * 8; // 14b [-8160,8160] 164 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542] 165 tmp[2 + i * 4] = (a0 - a1) * 8; 166 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9; 167 } 168 for (i = 0; i < 4; ++i) { 169 const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b 170 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); 171 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); 172 const int a3 = (tmp[0 + i] - tmp[12 + i]); 173 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b 174 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); 175 out[8 + i] = (a0 - a1 + 7) >> 4; 176 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); 177 } 178 } 179 180 static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) { 181 VP8FTransform(src, ref, out); 182 VP8FTransform(src + 4, ref + 4, out + 16); 183 } 184 185 static void FTransformWHT(const int16_t* in, int16_t* out) { 186 // input is 12b signed 187 int32_t tmp[16]; 188 int i; 189 for (i = 0; i < 4; ++i, in += 64) { 190 const int a0 = (in[0 * 16] + in[2 * 16]); // 13b 191 const int a1 = (in[1 * 16] + in[3 * 16]); 192 const int a2 = (in[1 * 16] - in[3 * 16]); 193 const int a3 = (in[0 * 16] - in[2 * 16]); 194 tmp[0 + i * 4] = a0 + a1; // 14b 195 tmp[1 + i * 4] = a3 + a2; 196 tmp[2 + i * 4] = a3 - a2; 197 tmp[3 + i * 4] = a0 - a1; 198 } 199 for (i = 0; i < 4; ++i) { 200 const int a0 = (tmp[0 + i] + tmp[8 + i]); // 15b 201 const int a1 = (tmp[4 + i] + tmp[12+ i]); 202 const int a2 = (tmp[4 + i] - tmp[12+ i]); 203 const int a3 = (tmp[0 + i] - tmp[8 + i]); 204 const int b0 = a0 + a1; // 16b 205 const int b1 = a3 + a2; 206 const int b2 = a3 - a2; 207 const int b3 = a0 - a1; 208 out[ 0 + i] = b0 >> 1; // 15b 209 out[ 4 + i] = b1 >> 1; 210 out[ 8 + i] = b2 >> 1; 211 out[12 + i] = b3 >> 1; 212 } 213 } 214 215 #undef MUL 216 #undef STORE 217 218 //------------------------------------------------------------------------------ 219 // Intra predictions 220 221 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { 222 int j; 223 for (j = 0; j < size; ++j) { 224 memset(dst + j * BPS, value, size); 225 } 226 } 227 228 static WEBP_INLINE void VerticalPred(uint8_t* dst, 229 const uint8_t* top, int size) { 230 int j; 231 if (top != NULL) { 232 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); 233 } else { 234 Fill(dst, 127, size); 235 } 236 } 237 238 static WEBP_INLINE void HorizontalPred(uint8_t* dst, 239 const uint8_t* left, int size) { 240 if (left != NULL) { 241 int j; 242 for (j = 0; j < size; ++j) { 243 memset(dst + j * BPS, left[j], size); 244 } 245 } else { 246 Fill(dst, 129, size); 247 } 248 } 249 250 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, 251 const uint8_t* top, int size) { 252 int y; 253 if (left != NULL) { 254 if (top != NULL) { 255 const uint8_t* const clip = clip1 + 255 - left[-1]; 256 for (y = 0; y < size; ++y) { 257 const uint8_t* const clip_table = clip + left[y]; 258 int x; 259 for (x = 0; x < size; ++x) { 260 dst[x] = clip_table[top[x]]; 261 } 262 dst += BPS; 263 } 264 } else { 265 HorizontalPred(dst, left, size); 266 } 267 } else { 268 // true motion without left samples (hence: with default 129 value) 269 // is equivalent to VE prediction where you just copy the top samples. 270 // Note that if top samples are not available, the default value is 271 // then 129, and not 127 as in the VerticalPred case. 272 if (top != NULL) { 273 VerticalPred(dst, top, size); 274 } else { 275 Fill(dst, 129, size); 276 } 277 } 278 } 279 280 static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, 281 const uint8_t* top, 282 int size, int round, int shift) { 283 int DC = 0; 284 int j; 285 if (top != NULL) { 286 for (j = 0; j < size; ++j) DC += top[j]; 287 if (left != NULL) { // top and left present 288 for (j = 0; j < size; ++j) DC += left[j]; 289 } else { // top, but no left 290 DC += DC; 291 } 292 DC = (DC + round) >> shift; 293 } else if (left != NULL) { // left but no top 294 for (j = 0; j < size; ++j) DC += left[j]; 295 DC += DC; 296 DC = (DC + round) >> shift; 297 } else { // no top, no left, nothing. 298 DC = 0x80; 299 } 300 Fill(dst, DC, size); 301 } 302 303 //------------------------------------------------------------------------------ 304 // Chroma 8x8 prediction (paragraph 12.2) 305 306 static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, 307 const uint8_t* top) { 308 // U block 309 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 310 VerticalPred(C8VE8 + dst, top, 8); 311 HorizontalPred(C8HE8 + dst, left, 8); 312 TrueMotion(C8TM8 + dst, left, top, 8); 313 // V block 314 dst += 8; 315 if (top != NULL) top += 8; 316 if (left != NULL) left += 16; 317 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 318 VerticalPred(C8VE8 + dst, top, 8); 319 HorizontalPred(C8HE8 + dst, left, 8); 320 TrueMotion(C8TM8 + dst, left, top, 8); 321 } 322 323 //------------------------------------------------------------------------------ 324 // luma 16x16 prediction (paragraph 12.3) 325 326 static void Intra16Preds(uint8_t* dst, 327 const uint8_t* left, const uint8_t* top) { 328 DCMode(I16DC16 + dst, left, top, 16, 16, 5); 329 VerticalPred(I16VE16 + dst, top, 16); 330 HorizontalPred(I16HE16 + dst, left, 16); 331 TrueMotion(I16TM16 + dst, left, top, 16); 332 } 333 334 //------------------------------------------------------------------------------ 335 // luma 4x4 prediction 336 337 #define DST(x, y) dst[(x) + (y) * BPS] 338 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) 339 #define AVG2(a, b) (((a) + (b) + 1) >> 1) 340 341 static void VE4(uint8_t* dst, const uint8_t* top) { // vertical 342 const uint8_t vals[4] = { 343 AVG3(top[-1], top[0], top[1]), 344 AVG3(top[ 0], top[1], top[2]), 345 AVG3(top[ 1], top[2], top[3]), 346 AVG3(top[ 2], top[3], top[4]) 347 }; 348 int i; 349 for (i = 0; i < 4; ++i) { 350 memcpy(dst + i * BPS, vals, 4); 351 } 352 } 353 354 static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal 355 const int X = top[-1]; 356 const int I = top[-2]; 357 const int J = top[-3]; 358 const int K = top[-4]; 359 const int L = top[-5]; 360 WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(X, I, J)); 361 WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(I, J, K)); 362 WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(J, K, L)); 363 WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L)); 364 } 365 366 static void DC4(uint8_t* dst, const uint8_t* top) { 367 uint32_t dc = 4; 368 int i; 369 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i]; 370 Fill(dst, dc >> 3, 4); 371 } 372 373 static void RD4(uint8_t* dst, const uint8_t* top) { 374 const int X = top[-1]; 375 const int I = top[-2]; 376 const int J = top[-3]; 377 const int K = top[-4]; 378 const int L = top[-5]; 379 const int A = top[0]; 380 const int B = top[1]; 381 const int C = top[2]; 382 const int D = top[3]; 383 DST(0, 3) = AVG3(J, K, L); 384 DST(0, 2) = DST(1, 3) = AVG3(I, J, K); 385 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J); 386 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I); 387 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X); 388 DST(2, 0) = DST(3, 1) = AVG3(C, B, A); 389 DST(3, 0) = AVG3(D, C, B); 390 } 391 392 static void LD4(uint8_t* dst, const uint8_t* top) { 393 const int A = top[0]; 394 const int B = top[1]; 395 const int C = top[2]; 396 const int D = top[3]; 397 const int E = top[4]; 398 const int F = top[5]; 399 const int G = top[6]; 400 const int H = top[7]; 401 DST(0, 0) = AVG3(A, B, C); 402 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); 403 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); 404 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); 405 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); 406 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); 407 DST(3, 3) = AVG3(G, H, H); 408 } 409 410 static void VR4(uint8_t* dst, const uint8_t* top) { 411 const int X = top[-1]; 412 const int I = top[-2]; 413 const int J = top[-3]; 414 const int K = top[-4]; 415 const int A = top[0]; 416 const int B = top[1]; 417 const int C = top[2]; 418 const int D = top[3]; 419 DST(0, 0) = DST(1, 2) = AVG2(X, A); 420 DST(1, 0) = DST(2, 2) = AVG2(A, B); 421 DST(2, 0) = DST(3, 2) = AVG2(B, C); 422 DST(3, 0) = AVG2(C, D); 423 424 DST(0, 3) = AVG3(K, J, I); 425 DST(0, 2) = AVG3(J, I, X); 426 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); 427 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); 428 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); 429 DST(3, 1) = AVG3(B, C, D); 430 } 431 432 static void VL4(uint8_t* dst, const uint8_t* top) { 433 const int A = top[0]; 434 const int B = top[1]; 435 const int C = top[2]; 436 const int D = top[3]; 437 const int E = top[4]; 438 const int F = top[5]; 439 const int G = top[6]; 440 const int H = top[7]; 441 DST(0, 0) = AVG2(A, B); 442 DST(1, 0) = DST(0, 2) = AVG2(B, C); 443 DST(2, 0) = DST(1, 2) = AVG2(C, D); 444 DST(3, 0) = DST(2, 2) = AVG2(D, E); 445 446 DST(0, 1) = AVG3(A, B, C); 447 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); 448 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); 449 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); 450 DST(3, 2) = AVG3(E, F, G); 451 DST(3, 3) = AVG3(F, G, H); 452 } 453 454 static void HU4(uint8_t* dst, const uint8_t* top) { 455 const int I = top[-2]; 456 const int J = top[-3]; 457 const int K = top[-4]; 458 const int L = top[-5]; 459 DST(0, 0) = AVG2(I, J); 460 DST(2, 0) = DST(0, 1) = AVG2(J, K); 461 DST(2, 1) = DST(0, 2) = AVG2(K, L); 462 DST(1, 0) = AVG3(I, J, K); 463 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); 464 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); 465 DST(3, 2) = DST(2, 2) = 466 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; 467 } 468 469 static void HD4(uint8_t* dst, const uint8_t* top) { 470 const int X = top[-1]; 471 const int I = top[-2]; 472 const int J = top[-3]; 473 const int K = top[-4]; 474 const int L = top[-5]; 475 const int A = top[0]; 476 const int B = top[1]; 477 const int C = top[2]; 478 479 DST(0, 0) = DST(2, 1) = AVG2(I, X); 480 DST(0, 1) = DST(2, 2) = AVG2(J, I); 481 DST(0, 2) = DST(2, 3) = AVG2(K, J); 482 DST(0, 3) = AVG2(L, K); 483 484 DST(3, 0) = AVG3(A, B, C); 485 DST(2, 0) = AVG3(X, A, B); 486 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); 487 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); 488 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); 489 DST(1, 3) = AVG3(L, K, J); 490 } 491 492 static void TM4(uint8_t* dst, const uint8_t* top) { 493 int x, y; 494 const uint8_t* const clip = clip1 + 255 - top[-1]; 495 for (y = 0; y < 4; ++y) { 496 const uint8_t* const clip_table = clip + top[-2 - y]; 497 for (x = 0; x < 4; ++x) { 498 dst[x] = clip_table[top[x]]; 499 } 500 dst += BPS; 501 } 502 } 503 504 #undef DST 505 #undef AVG3 506 #undef AVG2 507 508 // Left samples are top[-5 .. -2], top_left is top[-1], top are 509 // located at top[0..3], and top right is top[4..7] 510 static void Intra4Preds(uint8_t* dst, const uint8_t* top) { 511 DC4(I4DC4 + dst, top); 512 TM4(I4TM4 + dst, top); 513 VE4(I4VE4 + dst, top); 514 HE4(I4HE4 + dst, top); 515 RD4(I4RD4 + dst, top); 516 VR4(I4VR4 + dst, top); 517 LD4(I4LD4 + dst, top); 518 VL4(I4VL4 + dst, top); 519 HD4(I4HD4 + dst, top); 520 HU4(I4HU4 + dst, top); 521 } 522 523 //------------------------------------------------------------------------------ 524 // Metric 525 526 static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, 527 int w, int h) { 528 int count = 0; 529 int y, x; 530 for (y = 0; y < h; ++y) { 531 for (x = 0; x < w; ++x) { 532 const int diff = (int)a[x] - b[x]; 533 count += diff * diff; 534 } 535 a += BPS; 536 b += BPS; 537 } 538 return count; 539 } 540 541 static int SSE16x16(const uint8_t* a, const uint8_t* b) { 542 return GetSSE(a, b, 16, 16); 543 } 544 static int SSE16x8(const uint8_t* a, const uint8_t* b) { 545 return GetSSE(a, b, 16, 8); 546 } 547 static int SSE8x8(const uint8_t* a, const uint8_t* b) { 548 return GetSSE(a, b, 8, 8); 549 } 550 static int SSE4x4(const uint8_t* a, const uint8_t* b) { 551 return GetSSE(a, b, 4, 4); 552 } 553 554 //------------------------------------------------------------------------------ 555 // Texture distortion 556 // 557 // We try to match the spectral content (weighted) between source and 558 // reconstructed samples. 559 560 // Hadamard transform 561 // Returns the weighted sum of the absolute value of transformed coefficients. 562 static int TTransform(const uint8_t* in, const uint16_t* w) { 563 int sum = 0; 564 int tmp[16]; 565 int i; 566 // horizontal pass 567 for (i = 0; i < 4; ++i, in += BPS) { 568 const int a0 = in[0] + in[2]; 569 const int a1 = in[1] + in[3]; 570 const int a2 = in[1] - in[3]; 571 const int a3 = in[0] - in[2]; 572 tmp[0 + i * 4] = a0 + a1; 573 tmp[1 + i * 4] = a3 + a2; 574 tmp[2 + i * 4] = a3 - a2; 575 tmp[3 + i * 4] = a0 - a1; 576 } 577 // vertical pass 578 for (i = 0; i < 4; ++i, ++w) { 579 const int a0 = tmp[0 + i] + tmp[8 + i]; 580 const int a1 = tmp[4 + i] + tmp[12+ i]; 581 const int a2 = tmp[4 + i] - tmp[12+ i]; 582 const int a3 = tmp[0 + i] - tmp[8 + i]; 583 const int b0 = a0 + a1; 584 const int b1 = a3 + a2; 585 const int b2 = a3 - a2; 586 const int b3 = a0 - a1; 587 588 sum += w[ 0] * abs(b0); 589 sum += w[ 4] * abs(b1); 590 sum += w[ 8] * abs(b2); 591 sum += w[12] * abs(b3); 592 } 593 return sum; 594 } 595 596 static int Disto4x4(const uint8_t* const a, const uint8_t* const b, 597 const uint16_t* const w) { 598 const int sum1 = TTransform(a, w); 599 const int sum2 = TTransform(b, w); 600 return abs(sum2 - sum1) >> 5; 601 } 602 603 static int Disto16x16(const uint8_t* const a, const uint8_t* const b, 604 const uint16_t* const w) { 605 int D = 0; 606 int x, y; 607 for (y = 0; y < 16 * BPS; y += 4 * BPS) { 608 for (x = 0; x < 16; x += 4) { 609 D += Disto4x4(a + x + y, b + x + y, w); 610 } 611 } 612 return D; 613 } 614 615 //------------------------------------------------------------------------------ 616 // Quantization 617 // 618 619 static const uint8_t kZigzag[16] = { 620 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 621 }; 622 623 // Simple quantization 624 static int QuantizeBlock(int16_t in[16], int16_t out[16], 625 const VP8Matrix* const mtx) { 626 int last = -1; 627 int n; 628 for (n = 0; n < 16; ++n) { 629 const int j = kZigzag[n]; 630 const int sign = (in[j] < 0); 631 const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; 632 if (coeff > mtx->zthresh_[j]) { 633 const uint32_t Q = mtx->q_[j]; 634 const uint32_t iQ = mtx->iq_[j]; 635 const uint32_t B = mtx->bias_[j]; 636 int level = QUANTDIV(coeff, iQ, B); 637 if (level > MAX_LEVEL) level = MAX_LEVEL; 638 if (sign) level = -level; 639 in[j] = level * Q; 640 out[n] = level; 641 if (level) last = n; 642 } else { 643 out[n] = 0; 644 in[j] = 0; 645 } 646 } 647 return (last >= 0); 648 } 649 650 static int Quantize2Blocks(int16_t in[32], int16_t out[32], 651 const VP8Matrix* const mtx) { 652 int nz; 653 nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0; 654 nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1; 655 return nz; 656 } 657 658 static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], 659 const VP8Matrix* const mtx) { 660 int n, last = -1; 661 for (n = 0; n < 16; ++n) { 662 const int j = kZigzag[n]; 663 const int sign = (in[j] < 0); 664 const uint32_t coeff = sign ? -in[j] : in[j]; 665 assert(mtx->sharpen_[j] == 0); 666 if (coeff > mtx->zthresh_[j]) { 667 const uint32_t Q = mtx->q_[j]; 668 const uint32_t iQ = mtx->iq_[j]; 669 const uint32_t B = mtx->bias_[j]; 670 int level = QUANTDIV(coeff, iQ, B); 671 if (level > MAX_LEVEL) level = MAX_LEVEL; 672 if (sign) level = -level; 673 in[j] = level * Q; 674 out[n] = level; 675 if (level) last = n; 676 } else { 677 out[n] = 0; 678 in[j] = 0; 679 } 680 } 681 return (last >= 0); 682 } 683 684 //------------------------------------------------------------------------------ 685 // Block copy 686 687 static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) { 688 int y; 689 for (y = 0; y < h; ++y) { 690 memcpy(dst, src, w); 691 src += BPS; 692 dst += BPS; 693 } 694 } 695 696 static void Copy4x4(const uint8_t* src, uint8_t* dst) { 697 Copy(src, dst, 4, 4); 698 } 699 700 static void Copy16x8(const uint8_t* src, uint8_t* dst) { 701 Copy(src, dst, 16, 8); 702 } 703 704 //------------------------------------------------------------------------------ 705 // Initialization 706 707 // Speed-critical function pointers. We have to initialize them to the default 708 // implementations within VP8EncDspInit(). 709 VP8CHisto VP8CollectHistogram; 710 VP8Idct VP8ITransform; 711 VP8Fdct VP8FTransform; 712 VP8Fdct VP8FTransform2; 713 VP8WHT VP8FTransformWHT; 714 VP8Intra4Preds VP8EncPredLuma4; 715 VP8IntraPreds VP8EncPredLuma16; 716 VP8IntraPreds VP8EncPredChroma8; 717 VP8Metric VP8SSE16x16; 718 VP8Metric VP8SSE8x8; 719 VP8Metric VP8SSE16x8; 720 VP8Metric VP8SSE4x4; 721 VP8WMetric VP8TDisto4x4; 722 VP8WMetric VP8TDisto16x16; 723 VP8QuantizeBlock VP8EncQuantizeBlock; 724 VP8Quantize2Blocks VP8EncQuantize2Blocks; 725 VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; 726 VP8BlockCopy VP8Copy4x4; 727 VP8BlockCopy VP8Copy16x8; 728 729 extern void VP8EncDspInitSSE2(void); 730 extern void VP8EncDspInitSSE41(void); 731 extern void VP8EncDspInitAVX2(void); 732 extern void VP8EncDspInitNEON(void); 733 extern void VP8EncDspInitMIPS32(void); 734 extern void VP8EncDspInitMIPSdspR2(void); 735 736 static volatile VP8CPUInfo enc_last_cpuinfo_used = 737 (VP8CPUInfo)&enc_last_cpuinfo_used; 738 739 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { 740 if (enc_last_cpuinfo_used == VP8GetCPUInfo) return; 741 742 VP8DspInit(); // common inverse transforms 743 InitTables(); 744 745 // default C implementations 746 VP8CollectHistogram = CollectHistogram; 747 VP8ITransform = ITransform; 748 VP8FTransform = FTransform; 749 VP8FTransform2 = FTransform2; 750 VP8FTransformWHT = FTransformWHT; 751 VP8EncPredLuma4 = Intra4Preds; 752 VP8EncPredLuma16 = Intra16Preds; 753 VP8EncPredChroma8 = IntraChromaPreds; 754 VP8SSE16x16 = SSE16x16; 755 VP8SSE8x8 = SSE8x8; 756 VP8SSE16x8 = SSE16x8; 757 VP8SSE4x4 = SSE4x4; 758 VP8TDisto4x4 = Disto4x4; 759 VP8TDisto16x16 = Disto16x16; 760 VP8EncQuantizeBlock = QuantizeBlock; 761 VP8EncQuantize2Blocks = Quantize2Blocks; 762 VP8EncQuantizeBlockWHT = QuantizeBlockWHT; 763 VP8Copy4x4 = Copy4x4; 764 VP8Copy16x8 = Copy16x8; 765 766 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 767 if (VP8GetCPUInfo != NULL) { 768 #if defined(WEBP_USE_SSE2) 769 if (VP8GetCPUInfo(kSSE2)) { 770 VP8EncDspInitSSE2(); 771 #if defined(WEBP_USE_SSE41) 772 if (VP8GetCPUInfo(kSSE4_1)) { 773 VP8EncDspInitSSE41(); 774 } 775 #endif 776 } 777 #endif 778 #if defined(WEBP_USE_AVX2) 779 if (VP8GetCPUInfo(kAVX2)) { 780 VP8EncDspInitAVX2(); 781 } 782 #endif 783 #if defined(WEBP_USE_NEON) 784 if (VP8GetCPUInfo(kNEON)) { 785 VP8EncDspInitNEON(); 786 } 787 #endif 788 #if defined(WEBP_USE_MIPS32) 789 if (VP8GetCPUInfo(kMIPS32)) { 790 VP8EncDspInitMIPS32(); 791 } 792 #endif 793 #if defined(WEBP_USE_MIPS_DSP_R2) 794 if (VP8GetCPUInfo(kMIPSdspR2)) { 795 VP8EncDspInitMIPSdspR2(); 796 } 797 #endif 798 } 799 enc_last_cpuinfo_used = VP8GetCPUInfo; 800 } 801