1 // Copyright 2011 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // Speed-critical encoding functions. 11 // 12 // Author: Skal (pascal.massimino (at) gmail.com) 13 14 #include <stdlib.h> // for abs() 15 #include "./dsp.h" 16 #include "../enc/vp8enci.h" 17 18 #if defined(__cplusplus) || defined(c_plusplus) 19 extern "C" { 20 #endif 21 22 static WEBP_INLINE uint8_t clip_8b(int v) { 23 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; 24 } 25 26 static WEBP_INLINE int clip_max(int v, int max) { 27 return (v > max) ? max : v; 28 } 29 30 //------------------------------------------------------------------------------ 31 // Compute susceptibility based on DCT-coeff histograms: 32 // the higher, the "easier" the macroblock is to compress. 33 34 const int VP8DspScan[16 + 4 + 4] = { 35 // Luma 36 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 37 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 38 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 39 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, 40 41 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 42 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V 43 }; 44 45 static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, 46 int start_block, int end_block, 47 VP8Histogram* const histo) { 48 int j; 49 for (j = start_block; j < end_block; ++j) { 50 int k; 51 int16_t out[16]; 52 53 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); 54 55 // Convert coefficients to bin. 56 for (k = 0; k < 16; ++k) { 57 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? 58 const int clipped_value = clip_max(v, MAX_COEFF_THRESH); 59 histo->distribution[clipped_value]++; 60 } 61 } 62 } 63 64 //------------------------------------------------------------------------------ 65 // run-time tables (~4k) 66 67 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] 68 69 // We declare this variable 'volatile' to prevent instruction reordering 70 // and make sure it's set to true _last_ (so as to be thread-safe) 71 static volatile int tables_ok = 0; 72 73 static void InitTables(void) { 74 if (!tables_ok) { 75 int i; 76 for (i = -255; i <= 255 + 255; ++i) { 77 clip1[255 + i] = clip_8b(i); 78 } 79 tables_ok = 1; 80 } 81 } 82 83 84 //------------------------------------------------------------------------------ 85 // Transforms (Paragraph 14.4) 86 87 #define STORE(x, y, v) \ 88 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) 89 90 static const int kC1 = 20091 + (1 << 16); 91 static const int kC2 = 35468; 92 #define MUL(a, b) (((a) * (b)) >> 16) 93 94 static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, 95 uint8_t* dst) { 96 int C[4 * 4], *tmp; 97 int i; 98 tmp = C; 99 for (i = 0; i < 4; ++i) { // vertical pass 100 const int a = in[0] + in[8]; 101 const int b = in[0] - in[8]; 102 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); 103 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); 104 tmp[0] = a + d; 105 tmp[1] = b + c; 106 tmp[2] = b - c; 107 tmp[3] = a - d; 108 tmp += 4; 109 in++; 110 } 111 112 tmp = C; 113 for (i = 0; i < 4; ++i) { // horizontal pass 114 const int dc = tmp[0] + 4; 115 const int a = dc + tmp[8]; 116 const int b = dc - tmp[8]; 117 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); 118 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); 119 STORE(0, i, a + d); 120 STORE(1, i, b + c); 121 STORE(2, i, b - c); 122 STORE(3, i, a - d); 123 tmp++; 124 } 125 } 126 127 static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, 128 int do_two) { 129 ITransformOne(ref, in, dst); 130 if (do_two) { 131 ITransformOne(ref + 4, in + 16, dst + 4); 132 } 133 } 134 135 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { 136 int i; 137 int tmp[16]; 138 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) { 139 const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255]) 140 const int d1 = src[1] - ref[1]; 141 const int d2 = src[2] - ref[2]; 142 const int d3 = src[3] - ref[3]; 143 const int a0 = (d0 + d3); // 10b [-510,510] 144 const int a1 = (d1 + d2); 145 const int a2 = (d1 - d2); 146 const int a3 = (d0 - d3); 147 tmp[0 + i * 4] = (a0 + a1) * 8; // 14b [-8160,8160] 148 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542] 149 tmp[2 + i * 4] = (a0 - a1) * 8; 150 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9; 151 } 152 for (i = 0; i < 4; ++i) { 153 const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b 154 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); 155 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); 156 const int a3 = (tmp[0 + i] - tmp[12 + i]); 157 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b 158 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); 159 out[8 + i] = (a0 - a1 + 7) >> 4; 160 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); 161 } 162 } 163 164 static void ITransformWHT(const int16_t* in, int16_t* out) { 165 int tmp[16]; 166 int i; 167 for (i = 0; i < 4; ++i) { 168 const int a0 = in[0 + i] + in[12 + i]; 169 const int a1 = in[4 + i] + in[ 8 + i]; 170 const int a2 = in[4 + i] - in[ 8 + i]; 171 const int a3 = in[0 + i] - in[12 + i]; 172 tmp[0 + i] = a0 + a1; 173 tmp[8 + i] = a0 - a1; 174 tmp[4 + i] = a3 + a2; 175 tmp[12 + i] = a3 - a2; 176 } 177 for (i = 0; i < 4; ++i) { 178 const int dc = tmp[0 + i * 4] + 3; // w/ rounder 179 const int a0 = dc + tmp[3 + i * 4]; 180 const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4]; 181 const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4]; 182 const int a3 = dc - tmp[3 + i * 4]; 183 out[ 0] = (a0 + a1) >> 3; 184 out[16] = (a3 + a2) >> 3; 185 out[32] = (a0 - a1) >> 3; 186 out[48] = (a3 - a2) >> 3; 187 out += 64; 188 } 189 } 190 191 static void FTransformWHT(const int16_t* in, int16_t* out) { 192 // input is 12b signed 193 int16_t tmp[16]; 194 int i; 195 for (i = 0; i < 4; ++i, in += 64) { 196 const int a0 = (in[0 * 16] + in[2 * 16]); // 13b 197 const int a1 = (in[1 * 16] + in[3 * 16]); 198 const int a2 = (in[1 * 16] - in[3 * 16]); 199 const int a3 = (in[0 * 16] - in[2 * 16]); 200 tmp[0 + i * 4] = a0 + a1; // 14b 201 tmp[1 + i * 4] = a3 + a2; 202 tmp[2 + i * 4] = a3 - a2; 203 tmp[3 + i * 4] = a0 - a1; 204 } 205 for (i = 0; i < 4; ++i) { 206 const int a0 = (tmp[0 + i] + tmp[8 + i]); // 15b 207 const int a1 = (tmp[4 + i] + tmp[12+ i]); 208 const int a2 = (tmp[4 + i] - tmp[12+ i]); 209 const int a3 = (tmp[0 + i] - tmp[8 + i]); 210 const int b0 = a0 + a1; // 16b 211 const int b1 = a3 + a2; 212 const int b2 = a3 - a2; 213 const int b3 = a0 - a1; 214 out[ 0 + i] = b0 >> 1; // 15b 215 out[ 4 + i] = b1 >> 1; 216 out[ 8 + i] = b2 >> 1; 217 out[12 + i] = b3 >> 1; 218 } 219 } 220 221 #undef MUL 222 #undef STORE 223 224 //------------------------------------------------------------------------------ 225 // Intra predictions 226 227 #define DST(x, y) dst[(x) + (y) * BPS] 228 229 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { 230 int j; 231 for (j = 0; j < size; ++j) { 232 memset(dst + j * BPS, value, size); 233 } 234 } 235 236 static WEBP_INLINE void VerticalPred(uint8_t* dst, 237 const uint8_t* top, int size) { 238 int j; 239 if (top) { 240 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); 241 } else { 242 Fill(dst, 127, size); 243 } 244 } 245 246 static WEBP_INLINE void HorizontalPred(uint8_t* dst, 247 const uint8_t* left, int size) { 248 if (left) { 249 int j; 250 for (j = 0; j < size; ++j) { 251 memset(dst + j * BPS, left[j], size); 252 } 253 } else { 254 Fill(dst, 129, size); 255 } 256 } 257 258 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, 259 const uint8_t* top, int size) { 260 int y; 261 if (left) { 262 if (top) { 263 const uint8_t* const clip = clip1 + 255 - left[-1]; 264 for (y = 0; y < size; ++y) { 265 const uint8_t* const clip_table = clip + left[y]; 266 int x; 267 for (x = 0; x < size; ++x) { 268 dst[x] = clip_table[top[x]]; 269 } 270 dst += BPS; 271 } 272 } else { 273 HorizontalPred(dst, left, size); 274 } 275 } else { 276 // true motion without left samples (hence: with default 129 value) 277 // is equivalent to VE prediction where you just copy the top samples. 278 // Note that if top samples are not available, the default value is 279 // then 129, and not 127 as in the VerticalPred case. 280 if (top) { 281 VerticalPred(dst, top, size); 282 } else { 283 Fill(dst, 129, size); 284 } 285 } 286 } 287 288 static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, 289 const uint8_t* top, 290 int size, int round, int shift) { 291 int DC = 0; 292 int j; 293 if (top) { 294 for (j = 0; j < size; ++j) DC += top[j]; 295 if (left) { // top and left present 296 for (j = 0; j < size; ++j) DC += left[j]; 297 } else { // top, but no left 298 DC += DC; 299 } 300 DC = (DC + round) >> shift; 301 } else if (left) { // left but no top 302 for (j = 0; j < size; ++j) DC += left[j]; 303 DC += DC; 304 DC = (DC + round) >> shift; 305 } else { // no top, no left, nothing. 306 DC = 0x80; 307 } 308 Fill(dst, DC, size); 309 } 310 311 //------------------------------------------------------------------------------ 312 // Chroma 8x8 prediction (paragraph 12.2) 313 314 static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, 315 const uint8_t* top) { 316 // U block 317 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 318 VerticalPred(C8VE8 + dst, top, 8); 319 HorizontalPred(C8HE8 + dst, left, 8); 320 TrueMotion(C8TM8 + dst, left, top, 8); 321 // V block 322 dst += 8; 323 if (top) top += 8; 324 if (left) left += 16; 325 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 326 VerticalPred(C8VE8 + dst, top, 8); 327 HorizontalPred(C8HE8 + dst, left, 8); 328 TrueMotion(C8TM8 + dst, left, top, 8); 329 } 330 331 //------------------------------------------------------------------------------ 332 // luma 16x16 prediction (paragraph 12.3) 333 334 static void Intra16Preds(uint8_t* dst, 335 const uint8_t* left, const uint8_t* top) { 336 DCMode(I16DC16 + dst, left, top, 16, 16, 5); 337 VerticalPred(I16VE16 + dst, top, 16); 338 HorizontalPred(I16HE16 + dst, left, 16); 339 TrueMotion(I16TM16 + dst, left, top, 16); 340 } 341 342 //------------------------------------------------------------------------------ 343 // luma 4x4 prediction 344 345 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) 346 #define AVG2(a, b) (((a) + (b) + 1) >> 1) 347 348 static void VE4(uint8_t* dst, const uint8_t* top) { // vertical 349 const uint8_t vals[4] = { 350 AVG3(top[-1], top[0], top[1]), 351 AVG3(top[ 0], top[1], top[2]), 352 AVG3(top[ 1], top[2], top[3]), 353 AVG3(top[ 2], top[3], top[4]) 354 }; 355 int i; 356 for (i = 0; i < 4; ++i) { 357 memcpy(dst + i * BPS, vals, 4); 358 } 359 } 360 361 static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal 362 const int X = top[-1]; 363 const int I = top[-2]; 364 const int J = top[-3]; 365 const int K = top[-4]; 366 const int L = top[-5]; 367 *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J); 368 *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K); 369 *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L); 370 *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L); 371 } 372 373 static void DC4(uint8_t* dst, const uint8_t* top) { 374 uint32_t dc = 4; 375 int i; 376 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i]; 377 Fill(dst, dc >> 3, 4); 378 } 379 380 static void RD4(uint8_t* dst, const uint8_t* top) { 381 const int X = top[-1]; 382 const int I = top[-2]; 383 const int J = top[-3]; 384 const int K = top[-4]; 385 const int L = top[-5]; 386 const int A = top[0]; 387 const int B = top[1]; 388 const int C = top[2]; 389 const int D = top[3]; 390 DST(0, 3) = AVG3(J, K, L); 391 DST(0, 2) = DST(1, 3) = AVG3(I, J, K); 392 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J); 393 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I); 394 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X); 395 DST(2, 0) = DST(3, 1) = AVG3(C, B, A); 396 DST(3, 0) = AVG3(D, C, B); 397 } 398 399 static void LD4(uint8_t* dst, const uint8_t* top) { 400 const int A = top[0]; 401 const int B = top[1]; 402 const int C = top[2]; 403 const int D = top[3]; 404 const int E = top[4]; 405 const int F = top[5]; 406 const int G = top[6]; 407 const int H = top[7]; 408 DST(0, 0) = AVG3(A, B, C); 409 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); 410 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); 411 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); 412 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); 413 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); 414 DST(3, 3) = AVG3(G, H, H); 415 } 416 417 static void VR4(uint8_t* dst, const uint8_t* top) { 418 const int X = top[-1]; 419 const int I = top[-2]; 420 const int J = top[-3]; 421 const int K = top[-4]; 422 const int A = top[0]; 423 const int B = top[1]; 424 const int C = top[2]; 425 const int D = top[3]; 426 DST(0, 0) = DST(1, 2) = AVG2(X, A); 427 DST(1, 0) = DST(2, 2) = AVG2(A, B); 428 DST(2, 0) = DST(3, 2) = AVG2(B, C); 429 DST(3, 0) = AVG2(C, D); 430 431 DST(0, 3) = AVG3(K, J, I); 432 DST(0, 2) = AVG3(J, I, X); 433 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); 434 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); 435 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); 436 DST(3, 1) = AVG3(B, C, D); 437 } 438 439 static void VL4(uint8_t* dst, const uint8_t* top) { 440 const int A = top[0]; 441 const int B = top[1]; 442 const int C = top[2]; 443 const int D = top[3]; 444 const int E = top[4]; 445 const int F = top[5]; 446 const int G = top[6]; 447 const int H = top[7]; 448 DST(0, 0) = AVG2(A, B); 449 DST(1, 0) = DST(0, 2) = AVG2(B, C); 450 DST(2, 0) = DST(1, 2) = AVG2(C, D); 451 DST(3, 0) = DST(2, 2) = AVG2(D, E); 452 453 DST(0, 1) = AVG3(A, B, C); 454 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); 455 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); 456 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); 457 DST(3, 2) = AVG3(E, F, G); 458 DST(3, 3) = AVG3(F, G, H); 459 } 460 461 static void HU4(uint8_t* dst, const uint8_t* top) { 462 const int I = top[-2]; 463 const int J = top[-3]; 464 const int K = top[-4]; 465 const int L = top[-5]; 466 DST(0, 0) = AVG2(I, J); 467 DST(2, 0) = DST(0, 1) = AVG2(J, K); 468 DST(2, 1) = DST(0, 2) = AVG2(K, L); 469 DST(1, 0) = AVG3(I, J, K); 470 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); 471 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); 472 DST(3, 2) = DST(2, 2) = 473 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; 474 } 475 476 static void HD4(uint8_t* dst, const uint8_t* top) { 477 const int X = top[-1]; 478 const int I = top[-2]; 479 const int J = top[-3]; 480 const int K = top[-4]; 481 const int L = top[-5]; 482 const int A = top[0]; 483 const int B = top[1]; 484 const int C = top[2]; 485 486 DST(0, 0) = DST(2, 1) = AVG2(I, X); 487 DST(0, 1) = DST(2, 2) = AVG2(J, I); 488 DST(0, 2) = DST(2, 3) = AVG2(K, J); 489 DST(0, 3) = AVG2(L, K); 490 491 DST(3, 0) = AVG3(A, B, C); 492 DST(2, 0) = AVG3(X, A, B); 493 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); 494 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); 495 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); 496 DST(1, 3) = AVG3(L, K, J); 497 } 498 499 static void TM4(uint8_t* dst, const uint8_t* top) { 500 int x, y; 501 const uint8_t* const clip = clip1 + 255 - top[-1]; 502 for (y = 0; y < 4; ++y) { 503 const uint8_t* const clip_table = clip + top[-2 - y]; 504 for (x = 0; x < 4; ++x) { 505 dst[x] = clip_table[top[x]]; 506 } 507 dst += BPS; 508 } 509 } 510 511 #undef DST 512 #undef AVG3 513 #undef AVG2 514 515 // Left samples are top[-5 .. -2], top_left is top[-1], top are 516 // located at top[0..3], and top right is top[4..7] 517 static void Intra4Preds(uint8_t* dst, const uint8_t* top) { 518 DC4(I4DC4 + dst, top); 519 TM4(I4TM4 + dst, top); 520 VE4(I4VE4 + dst, top); 521 HE4(I4HE4 + dst, top); 522 RD4(I4RD4 + dst, top); 523 VR4(I4VR4 + dst, top); 524 LD4(I4LD4 + dst, top); 525 VL4(I4VL4 + dst, top); 526 HD4(I4HD4 + dst, top); 527 HU4(I4HU4 + dst, top); 528 } 529 530 //------------------------------------------------------------------------------ 531 // Metric 532 533 static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, 534 int w, int h) { 535 int count = 0; 536 int y, x; 537 for (y = 0; y < h; ++y) { 538 for (x = 0; x < w; ++x) { 539 const int diff = (int)a[x] - b[x]; 540 count += diff * diff; 541 } 542 a += BPS; 543 b += BPS; 544 } 545 return count; 546 } 547 548 static int SSE16x16(const uint8_t* a, const uint8_t* b) { 549 return GetSSE(a, b, 16, 16); 550 } 551 static int SSE16x8(const uint8_t* a, const uint8_t* b) { 552 return GetSSE(a, b, 16, 8); 553 } 554 static int SSE8x8(const uint8_t* a, const uint8_t* b) { 555 return GetSSE(a, b, 8, 8); 556 } 557 static int SSE4x4(const uint8_t* a, const uint8_t* b) { 558 return GetSSE(a, b, 4, 4); 559 } 560 561 //------------------------------------------------------------------------------ 562 // Texture distortion 563 // 564 // We try to match the spectral content (weighted) between source and 565 // reconstructed samples. 566 567 // Hadamard transform 568 // Returns the weighted sum of the absolute value of transformed coefficients. 569 static int TTransform(const uint8_t* in, const uint16_t* w) { 570 int sum = 0; 571 int tmp[16]; 572 int i; 573 // horizontal pass 574 for (i = 0; i < 4; ++i, in += BPS) { 575 const int a0 = in[0] + in[2]; 576 const int a1 = in[1] + in[3]; 577 const int a2 = in[1] - in[3]; 578 const int a3 = in[0] - in[2]; 579 tmp[0 + i * 4] = a0 + a1; 580 tmp[1 + i * 4] = a3 + a2; 581 tmp[2 + i * 4] = a3 - a2; 582 tmp[3 + i * 4] = a0 - a1; 583 } 584 // vertical pass 585 for (i = 0; i < 4; ++i, ++w) { 586 const int a0 = tmp[0 + i] + tmp[8 + i]; 587 const int a1 = tmp[4 + i] + tmp[12+ i]; 588 const int a2 = tmp[4 + i] - tmp[12+ i]; 589 const int a3 = tmp[0 + i] - tmp[8 + i]; 590 const int b0 = a0 + a1; 591 const int b1 = a3 + a2; 592 const int b2 = a3 - a2; 593 const int b3 = a0 - a1; 594 595 sum += w[ 0] * abs(b0); 596 sum += w[ 4] * abs(b1); 597 sum += w[ 8] * abs(b2); 598 sum += w[12] * abs(b3); 599 } 600 return sum; 601 } 602 603 static int Disto4x4(const uint8_t* const a, const uint8_t* const b, 604 const uint16_t* const w) { 605 const int sum1 = TTransform(a, w); 606 const int sum2 = TTransform(b, w); 607 return abs(sum2 - sum1) >> 5; 608 } 609 610 static int Disto16x16(const uint8_t* const a, const uint8_t* const b, 611 const uint16_t* const w) { 612 int D = 0; 613 int x, y; 614 for (y = 0; y < 16 * BPS; y += 4 * BPS) { 615 for (x = 0; x < 16; x += 4) { 616 D += Disto4x4(a + x + y, b + x + y, w); 617 } 618 } 619 return D; 620 } 621 622 //------------------------------------------------------------------------------ 623 // Quantization 624 // 625 626 static const uint8_t kZigzag[16] = { 627 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 628 }; 629 630 // Simple quantization 631 static int QuantizeBlock(int16_t in[16], int16_t out[16], 632 int n, const VP8Matrix* const mtx) { 633 int last = -1; 634 for (; n < 16; ++n) { 635 const int j = kZigzag[n]; 636 const int sign = (in[j] < 0); 637 const int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; 638 if (coeff > mtx->zthresh_[j]) { 639 const int Q = mtx->q_[j]; 640 const int iQ = mtx->iq_[j]; 641 const int B = mtx->bias_[j]; 642 out[n] = QUANTDIV(coeff, iQ, B); 643 if (out[n] > MAX_LEVEL) out[n] = MAX_LEVEL; 644 if (sign) out[n] = -out[n]; 645 in[j] = out[n] * Q; 646 if (out[n]) last = n; 647 } else { 648 out[n] = 0; 649 in[j] = 0; 650 } 651 } 652 return (last >= 0); 653 } 654 655 //------------------------------------------------------------------------------ 656 // Block copy 657 658 static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) { 659 int y; 660 for (y = 0; y < size; ++y) { 661 memcpy(dst, src, size); 662 src += BPS; 663 dst += BPS; 664 } 665 } 666 667 static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } 668 669 //------------------------------------------------------------------------------ 670 // Initialization 671 672 // Speed-critical function pointers. We have to initialize them to the default 673 // implementations within VP8EncDspInit(). 674 VP8CHisto VP8CollectHistogram; 675 VP8Idct VP8ITransform; 676 VP8Fdct VP8FTransform; 677 VP8WHT VP8ITransformWHT; 678 VP8WHT VP8FTransformWHT; 679 VP8Intra4Preds VP8EncPredLuma4; 680 VP8IntraPreds VP8EncPredLuma16; 681 VP8IntraPreds VP8EncPredChroma8; 682 VP8Metric VP8SSE16x16; 683 VP8Metric VP8SSE8x8; 684 VP8Metric VP8SSE16x8; 685 VP8Metric VP8SSE4x4; 686 VP8WMetric VP8TDisto4x4; 687 VP8WMetric VP8TDisto16x16; 688 VP8QuantizeBlock VP8EncQuantizeBlock; 689 VP8BlockCopy VP8Copy4x4; 690 691 extern void VP8EncDspInitSSE2(void); 692 extern void VP8EncDspInitNEON(void); 693 694 void VP8EncDspInit(void) { 695 InitTables(); 696 697 // default C implementations 698 VP8CollectHistogram = CollectHistogram; 699 VP8ITransform = ITransform; 700 VP8FTransform = FTransform; 701 VP8ITransformWHT = ITransformWHT; 702 VP8FTransformWHT = FTransformWHT; 703 VP8EncPredLuma4 = Intra4Preds; 704 VP8EncPredLuma16 = Intra16Preds; 705 VP8EncPredChroma8 = IntraChromaPreds; 706 VP8SSE16x16 = SSE16x16; 707 VP8SSE8x8 = SSE8x8; 708 VP8SSE16x8 = SSE16x8; 709 VP8SSE4x4 = SSE4x4; 710 VP8TDisto4x4 = Disto4x4; 711 VP8TDisto16x16 = Disto16x16; 712 VP8EncQuantizeBlock = QuantizeBlock; 713 VP8Copy4x4 = Copy4x4; 714 715 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 716 if (VP8GetCPUInfo) { 717 #if defined(WEBP_USE_SSE2) 718 if (VP8GetCPUInfo(kSSE2)) { 719 VP8EncDspInitSSE2(); 720 } 721 #elif defined(WEBP_USE_NEON) 722 if (VP8GetCPUInfo(kNEON)) { 723 VP8EncDspInitNEON(); 724 } 725 #endif 726 } 727 } 728 729 #if defined(__cplusplus) || defined(c_plusplus) 730 } // extern "C" 731 #endif 732