1 // Copyright 2011 Google Inc. All Rights Reserved. 2 // 3 // This code is licensed under the same terms as WebM: 4 // Software License Agreement: http://www.webmproject.org/license/software/ 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 6 // ----------------------------------------------------------------------------- 7 // 8 // Speed-critical encoding functions. 9 // 10 // Author: Skal (pascal.massimino (at) gmail.com) 11 12 #include <stdlib.h> // for abs() 13 #include "./dsp.h" 14 #include "../enc/vp8enci.h" 15 16 #if defined(__cplusplus) || defined(c_plusplus) 17 extern "C" { 18 #endif 19 20 static WEBP_INLINE uint8_t clip_8b(int v) { 21 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; 22 } 23 24 static WEBP_INLINE int clip_max(int v, int max) { 25 return (v > max) ? max : v; 26 } 27 28 //------------------------------------------------------------------------------ 29 // Compute susceptibility based on DCT-coeff histograms: 30 // the higher, the "easier" the macroblock is to compress. 31 32 const int VP8DspScan[16 + 4 + 4] = { 33 // Luma 34 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 35 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 36 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 37 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, 38 39 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 40 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V 41 }; 42 43 static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, 44 int start_block, int end_block, 45 VP8Histogram* const histo) { 46 int j; 47 for (j = start_block; j < end_block; ++j) { 48 int k; 49 int16_t out[16]; 50 51 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); 52 53 // Convert coefficients to bin. 54 for (k = 0; k < 16; ++k) { 55 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? 56 const int clipped_value = clip_max(v, MAX_COEFF_THRESH); 57 histo->distribution[clipped_value]++; 58 } 59 } 60 } 61 62 //------------------------------------------------------------------------------ 63 // run-time tables (~4k) 64 65 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] 66 67 // We declare this variable 'volatile' to prevent instruction reordering 68 // and make sure it's set to true _last_ (so as to be thread-safe) 69 static volatile int tables_ok = 0; 70 71 static void InitTables(void) { 72 if (!tables_ok) { 73 int i; 74 for (i = -255; i <= 255 + 255; ++i) { 75 clip1[255 + i] = clip_8b(i); 76 } 77 tables_ok = 1; 78 } 79 } 80 81 82 //------------------------------------------------------------------------------ 83 // Transforms (Paragraph 14.4) 84 85 #define STORE(x, y, v) \ 86 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) 87 88 static const int kC1 = 20091 + (1 << 16); 89 static const int kC2 = 35468; 90 #define MUL(a, b) (((a) * (b)) >> 16) 91 92 static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, 93 uint8_t* dst) { 94 int C[4 * 4], *tmp; 95 int i; 96 tmp = C; 97 for (i = 0; i < 4; ++i) { // vertical pass 98 const int a = in[0] + in[8]; 99 const int b = in[0] - in[8]; 100 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); 101 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); 102 tmp[0] = a + d; 103 tmp[1] = b + c; 104 tmp[2] = b - c; 105 tmp[3] = a - d; 106 tmp += 4; 107 in++; 108 } 109 110 tmp = C; 111 for (i = 0; i < 4; ++i) { // horizontal pass 112 const int dc = tmp[0] + 4; 113 const int a = dc + tmp[8]; 114 const int b = dc - tmp[8]; 115 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); 116 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); 117 STORE(0, i, a + d); 118 STORE(1, i, b + c); 119 STORE(2, i, b - c); 120 STORE(3, i, a - d); 121 tmp++; 122 } 123 } 124 125 static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, 126 int do_two) { 127 ITransformOne(ref, in, dst); 128 if (do_two) { 129 ITransformOne(ref + 4, in + 16, dst + 4); 130 } 131 } 132 133 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { 134 int i; 135 int tmp[16]; 136 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) { 137 const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255]) 138 const int d1 = src[1] - ref[1]; 139 const int d2 = src[2] - ref[2]; 140 const int d3 = src[3] - ref[3]; 141 const int a0 = (d0 + d3); // 10b [-510,510] 142 const int a1 = (d1 + d2); 143 const int a2 = (d1 - d2); 144 const int a3 = (d0 - d3); 145 tmp[0 + i * 4] = (a0 + a1) << 3; // 14b [-8160,8160] 146 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542] 147 tmp[2 + i * 4] = (a0 - a1) << 3; 148 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9; 149 } 150 for (i = 0; i < 4; ++i) { 151 const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b 152 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); 153 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); 154 const int a3 = (tmp[0 + i] - tmp[12 + i]); 155 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b 156 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); 157 out[8 + i] = (a0 - a1 + 7) >> 4; 158 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); 159 } 160 } 161 162 static void ITransformWHT(const int16_t* in, int16_t* out) { 163 int tmp[16]; 164 int i; 165 for (i = 0; i < 4; ++i) { 166 const int a0 = in[0 + i] + in[12 + i]; 167 const int a1 = in[4 + i] + in[ 8 + i]; 168 const int a2 = in[4 + i] - in[ 8 + i]; 169 const int a3 = in[0 + i] - in[12 + i]; 170 tmp[0 + i] = a0 + a1; 171 tmp[8 + i] = a0 - a1; 172 tmp[4 + i] = a3 + a2; 173 tmp[12 + i] = a3 - a2; 174 } 175 for (i = 0; i < 4; ++i) { 176 const int dc = tmp[0 + i * 4] + 3; // w/ rounder 177 const int a0 = dc + tmp[3 + i * 4]; 178 const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4]; 179 const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4]; 180 const int a3 = dc - tmp[3 + i * 4]; 181 out[ 0] = (a0 + a1) >> 3; 182 out[16] = (a3 + a2) >> 3; 183 out[32] = (a0 - a1) >> 3; 184 out[48] = (a3 - a2) >> 3; 185 out += 64; 186 } 187 } 188 189 static void FTransformWHT(const int16_t* in, int16_t* out) { 190 int tmp[16]; 191 int i; 192 for (i = 0; i < 4; ++i, in += 64) { 193 const int a0 = (in[0 * 16] + in[2 * 16]) << 2; 194 const int a1 = (in[1 * 16] + in[3 * 16]) << 2; 195 const int a2 = (in[1 * 16] - in[3 * 16]) << 2; 196 const int a3 = (in[0 * 16] - in[2 * 16]) << 2; 197 tmp[0 + i * 4] = (a0 + a1) + (a0 != 0); 198 tmp[1 + i * 4] = a3 + a2; 199 tmp[2 + i * 4] = a3 - a2; 200 tmp[3 + i * 4] = a0 - a1; 201 } 202 for (i = 0; i < 4; ++i) { 203 const int a0 = (tmp[0 + i] + tmp[8 + i]); 204 const int a1 = (tmp[4 + i] + tmp[12+ i]); 205 const int a2 = (tmp[4 + i] - tmp[12+ i]); 206 const int a3 = (tmp[0 + i] - tmp[8 + i]); 207 const int b0 = a0 + a1; 208 const int b1 = a3 + a2; 209 const int b2 = a3 - a2; 210 const int b3 = a0 - a1; 211 out[ 0 + i] = (b0 + (b0 > 0) + 3) >> 3; 212 out[ 4 + i] = (b1 + (b1 > 0) + 3) >> 3; 213 out[ 8 + i] = (b2 + (b2 > 0) + 3) >> 3; 214 out[12 + i] = (b3 + (b3 > 0) + 3) >> 3; 215 } 216 } 217 218 #undef MUL 219 #undef STORE 220 221 //------------------------------------------------------------------------------ 222 // Intra predictions 223 224 #define DST(x, y) dst[(x) + (y) * BPS] 225 226 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { 227 int j; 228 for (j = 0; j < size; ++j) { 229 memset(dst + j * BPS, value, size); 230 } 231 } 232 233 static WEBP_INLINE void VerticalPred(uint8_t* dst, 234 const uint8_t* top, int size) { 235 int j; 236 if (top) { 237 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); 238 } else { 239 Fill(dst, 127, size); 240 } 241 } 242 243 static WEBP_INLINE void HorizontalPred(uint8_t* dst, 244 const uint8_t* left, int size) { 245 if (left) { 246 int j; 247 for (j = 0; j < size; ++j) { 248 memset(dst + j * BPS, left[j], size); 249 } 250 } else { 251 Fill(dst, 129, size); 252 } 253 } 254 255 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, 256 const uint8_t* top, int size) { 257 int y; 258 if (left) { 259 if (top) { 260 const uint8_t* const clip = clip1 + 255 - left[-1]; 261 for (y = 0; y < size; ++y) { 262 const uint8_t* const clip_table = clip + left[y]; 263 int x; 264 for (x = 0; x < size; ++x) { 265 dst[x] = clip_table[top[x]]; 266 } 267 dst += BPS; 268 } 269 } else { 270 HorizontalPred(dst, left, size); 271 } 272 } else { 273 // true motion without left samples (hence: with default 129 value) 274 // is equivalent to VE prediction where you just copy the top samples. 275 // Note that if top samples are not available, the default value is 276 // then 129, and not 127 as in the VerticalPred case. 277 if (top) { 278 VerticalPred(dst, top, size); 279 } else { 280 Fill(dst, 129, size); 281 } 282 } 283 } 284 285 static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, 286 const uint8_t* top, 287 int size, int round, int shift) { 288 int DC = 0; 289 int j; 290 if (top) { 291 for (j = 0; j < size; ++j) DC += top[j]; 292 if (left) { // top and left present 293 for (j = 0; j < size; ++j) DC += left[j]; 294 } else { // top, but no left 295 DC += DC; 296 } 297 DC = (DC + round) >> shift; 298 } else if (left) { // left but no top 299 for (j = 0; j < size; ++j) DC += left[j]; 300 DC += DC; 301 DC = (DC + round) >> shift; 302 } else { // no top, no left, nothing. 303 DC = 0x80; 304 } 305 Fill(dst, DC, size); 306 } 307 308 //------------------------------------------------------------------------------ 309 // Chroma 8x8 prediction (paragraph 12.2) 310 311 static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, 312 const uint8_t* top) { 313 // U block 314 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 315 VerticalPred(C8VE8 + dst, top, 8); 316 HorizontalPred(C8HE8 + dst, left, 8); 317 TrueMotion(C8TM8 + dst, left, top, 8); 318 // V block 319 dst += 8; 320 if (top) top += 8; 321 if (left) left += 16; 322 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 323 VerticalPred(C8VE8 + dst, top, 8); 324 HorizontalPred(C8HE8 + dst, left, 8); 325 TrueMotion(C8TM8 + dst, left, top, 8); 326 } 327 328 //------------------------------------------------------------------------------ 329 // luma 16x16 prediction (paragraph 12.3) 330 331 static void Intra16Preds(uint8_t* dst, 332 const uint8_t* left, const uint8_t* top) { 333 DCMode(I16DC16 + dst, left, top, 16, 16, 5); 334 VerticalPred(I16VE16 + dst, top, 16); 335 HorizontalPred(I16HE16 + dst, left, 16); 336 TrueMotion(I16TM16 + dst, left, top, 16); 337 } 338 339 //------------------------------------------------------------------------------ 340 // luma 4x4 prediction 341 342 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) 343 #define AVG2(a, b) (((a) + (b) + 1) >> 1) 344 345 static void VE4(uint8_t* dst, const uint8_t* top) { // vertical 346 const uint8_t vals[4] = { 347 AVG3(top[-1], top[0], top[1]), 348 AVG3(top[ 0], top[1], top[2]), 349 AVG3(top[ 1], top[2], top[3]), 350 AVG3(top[ 2], top[3], top[4]) 351 }; 352 int i; 353 for (i = 0; i < 4; ++i) { 354 memcpy(dst + i * BPS, vals, 4); 355 } 356 } 357 358 static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal 359 const int X = top[-1]; 360 const int I = top[-2]; 361 const int J = top[-3]; 362 const int K = top[-4]; 363 const int L = top[-5]; 364 *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J); 365 *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K); 366 *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L); 367 *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L); 368 } 369 370 static void DC4(uint8_t* dst, const uint8_t* top) { 371 uint32_t dc = 4; 372 int i; 373 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i]; 374 Fill(dst, dc >> 3, 4); 375 } 376 377 static void RD4(uint8_t* dst, const uint8_t* top) { 378 const int X = top[-1]; 379 const int I = top[-2]; 380 const int J = top[-3]; 381 const int K = top[-4]; 382 const int L = top[-5]; 383 const int A = top[0]; 384 const int B = top[1]; 385 const int C = top[2]; 386 const int D = top[3]; 387 DST(0, 3) = AVG3(J, K, L); 388 DST(0, 2) = DST(1, 3) = AVG3(I, J, K); 389 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J); 390 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I); 391 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X); 392 DST(2, 0) = DST(3, 1) = AVG3(C, B, A); 393 DST(3, 0) = AVG3(D, C, B); 394 } 395 396 static void LD4(uint8_t* dst, const uint8_t* top) { 397 const int A = top[0]; 398 const int B = top[1]; 399 const int C = top[2]; 400 const int D = top[3]; 401 const int E = top[4]; 402 const int F = top[5]; 403 const int G = top[6]; 404 const int H = top[7]; 405 DST(0, 0) = AVG3(A, B, C); 406 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); 407 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); 408 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); 409 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); 410 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); 411 DST(3, 3) = AVG3(G, H, H); 412 } 413 414 static void VR4(uint8_t* dst, const uint8_t* top) { 415 const int X = top[-1]; 416 const int I = top[-2]; 417 const int J = top[-3]; 418 const int K = top[-4]; 419 const int A = top[0]; 420 const int B = top[1]; 421 const int C = top[2]; 422 const int D = top[3]; 423 DST(0, 0) = DST(1, 2) = AVG2(X, A); 424 DST(1, 0) = DST(2, 2) = AVG2(A, B); 425 DST(2, 0) = DST(3, 2) = AVG2(B, C); 426 DST(3, 0) = AVG2(C, D); 427 428 DST(0, 3) = AVG3(K, J, I); 429 DST(0, 2) = AVG3(J, I, X); 430 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); 431 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); 432 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); 433 DST(3, 1) = AVG3(B, C, D); 434 } 435 436 static void VL4(uint8_t* dst, const uint8_t* top) { 437 const int A = top[0]; 438 const int B = top[1]; 439 const int C = top[2]; 440 const int D = top[3]; 441 const int E = top[4]; 442 const int F = top[5]; 443 const int G = top[6]; 444 const int H = top[7]; 445 DST(0, 0) = AVG2(A, B); 446 DST(1, 0) = DST(0, 2) = AVG2(B, C); 447 DST(2, 0) = DST(1, 2) = AVG2(C, D); 448 DST(3, 0) = DST(2, 2) = AVG2(D, E); 449 450 DST(0, 1) = AVG3(A, B, C); 451 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); 452 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); 453 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); 454 DST(3, 2) = AVG3(E, F, G); 455 DST(3, 3) = AVG3(F, G, H); 456 } 457 458 static void HU4(uint8_t* dst, const uint8_t* top) { 459 const int I = top[-2]; 460 const int J = top[-3]; 461 const int K = top[-4]; 462 const int L = top[-5]; 463 DST(0, 0) = AVG2(I, J); 464 DST(2, 0) = DST(0, 1) = AVG2(J, K); 465 DST(2, 1) = DST(0, 2) = AVG2(K, L); 466 DST(1, 0) = AVG3(I, J, K); 467 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); 468 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); 469 DST(3, 2) = DST(2, 2) = 470 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; 471 } 472 473 static void HD4(uint8_t* dst, const uint8_t* top) { 474 const int X = top[-1]; 475 const int I = top[-2]; 476 const int J = top[-3]; 477 const int K = top[-4]; 478 const int L = top[-5]; 479 const int A = top[0]; 480 const int B = top[1]; 481 const int C = top[2]; 482 483 DST(0, 0) = DST(2, 1) = AVG2(I, X); 484 DST(0, 1) = DST(2, 2) = AVG2(J, I); 485 DST(0, 2) = DST(2, 3) = AVG2(K, J); 486 DST(0, 3) = AVG2(L, K); 487 488 DST(3, 0) = AVG3(A, B, C); 489 DST(2, 0) = AVG3(X, A, B); 490 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); 491 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); 492 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); 493 DST(1, 3) = AVG3(L, K, J); 494 } 495 496 static void TM4(uint8_t* dst, const uint8_t* top) { 497 int x, y; 498 const uint8_t* const clip = clip1 + 255 - top[-1]; 499 for (y = 0; y < 4; ++y) { 500 const uint8_t* const clip_table = clip + top[-2 - y]; 501 for (x = 0; x < 4; ++x) { 502 dst[x] = clip_table[top[x]]; 503 } 504 dst += BPS; 505 } 506 } 507 508 #undef DST 509 #undef AVG3 510 #undef AVG2 511 512 // Left samples are top[-5 .. -2], top_left is top[-1], top are 513 // located at top[0..3], and top right is top[4..7] 514 static void Intra4Preds(uint8_t* dst, const uint8_t* top) { 515 DC4(I4DC4 + dst, top); 516 TM4(I4TM4 + dst, top); 517 VE4(I4VE4 + dst, top); 518 HE4(I4HE4 + dst, top); 519 RD4(I4RD4 + dst, top); 520 VR4(I4VR4 + dst, top); 521 LD4(I4LD4 + dst, top); 522 VL4(I4VL4 + dst, top); 523 HD4(I4HD4 + dst, top); 524 HU4(I4HU4 + dst, top); 525 } 526 527 //------------------------------------------------------------------------------ 528 // Metric 529 530 static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, 531 int w, int h) { 532 int count = 0; 533 int y, x; 534 for (y = 0; y < h; ++y) { 535 for (x = 0; x < w; ++x) { 536 const int diff = (int)a[x] - b[x]; 537 count += diff * diff; 538 } 539 a += BPS; 540 b += BPS; 541 } 542 return count; 543 } 544 545 static int SSE16x16(const uint8_t* a, const uint8_t* b) { 546 return GetSSE(a, b, 16, 16); 547 } 548 static int SSE16x8(const uint8_t* a, const uint8_t* b) { 549 return GetSSE(a, b, 16, 8); 550 } 551 static int SSE8x8(const uint8_t* a, const uint8_t* b) { 552 return GetSSE(a, b, 8, 8); 553 } 554 static int SSE4x4(const uint8_t* a, const uint8_t* b) { 555 return GetSSE(a, b, 4, 4); 556 } 557 558 //------------------------------------------------------------------------------ 559 // Texture distortion 560 // 561 // We try to match the spectral content (weighted) between source and 562 // reconstructed samples. 563 564 // Hadamard transform 565 // Returns the weighted sum of the absolute value of transformed coefficients. 566 static int TTransform(const uint8_t* in, const uint16_t* w) { 567 int sum = 0; 568 int tmp[16]; 569 int i; 570 // horizontal pass 571 for (i = 0; i < 4; ++i, in += BPS) { 572 const int a0 = in[0] + in[2]; 573 const int a1 = in[1] + in[3]; 574 const int a2 = in[1] - in[3]; 575 const int a3 = in[0] - in[2]; 576 tmp[0 + i * 4] = a0 + a1; 577 tmp[1 + i * 4] = a3 + a2; 578 tmp[2 + i * 4] = a3 - a2; 579 tmp[3 + i * 4] = a0 - a1; 580 } 581 // vertical pass 582 for (i = 0; i < 4; ++i, ++w) { 583 const int a0 = tmp[0 + i] + tmp[8 + i]; 584 const int a1 = tmp[4 + i] + tmp[12+ i]; 585 const int a2 = tmp[4 + i] - tmp[12+ i]; 586 const int a3 = tmp[0 + i] - tmp[8 + i]; 587 const int b0 = a0 + a1; 588 const int b1 = a3 + a2; 589 const int b2 = a3 - a2; 590 const int b3 = a0 - a1; 591 592 sum += w[ 0] * abs(b0); 593 sum += w[ 4] * abs(b1); 594 sum += w[ 8] * abs(b2); 595 sum += w[12] * abs(b3); 596 } 597 return sum; 598 } 599 600 static int Disto4x4(const uint8_t* const a, const uint8_t* const b, 601 const uint16_t* const w) { 602 const int sum1 = TTransform(a, w); 603 const int sum2 = TTransform(b, w); 604 return abs(sum2 - sum1) >> 5; 605 } 606 607 static int Disto16x16(const uint8_t* const a, const uint8_t* const b, 608 const uint16_t* const w) { 609 int D = 0; 610 int x, y; 611 for (y = 0; y < 16 * BPS; y += 4 * BPS) { 612 for (x = 0; x < 16; x += 4) { 613 D += Disto4x4(a + x + y, b + x + y, w); 614 } 615 } 616 return D; 617 } 618 619 //------------------------------------------------------------------------------ 620 // Quantization 621 // 622 623 static const uint8_t kZigzag[16] = { 624 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 625 }; 626 627 // Simple quantization 628 static int QuantizeBlock(int16_t in[16], int16_t out[16], 629 int n, const VP8Matrix* const mtx) { 630 int last = -1; 631 for (; n < 16; ++n) { 632 const int j = kZigzag[n]; 633 const int sign = (in[j] < 0); 634 int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; 635 if (coeff > 2047) coeff = 2047; 636 if (coeff > mtx->zthresh_[j]) { 637 const int Q = mtx->q_[j]; 638 const int iQ = mtx->iq_[j]; 639 const int B = mtx->bias_[j]; 640 out[n] = QUANTDIV(coeff, iQ, B); 641 if (sign) out[n] = -out[n]; 642 in[j] = out[n] * Q; 643 if (out[n]) last = n; 644 } else { 645 out[n] = 0; 646 in[j] = 0; 647 } 648 } 649 return (last >= 0); 650 } 651 652 //------------------------------------------------------------------------------ 653 // Block copy 654 655 static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) { 656 int y; 657 for (y = 0; y < size; ++y) { 658 memcpy(dst, src, size); 659 src += BPS; 660 dst += BPS; 661 } 662 } 663 664 static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } 665 666 //------------------------------------------------------------------------------ 667 // Initialization 668 669 // Speed-critical function pointers. We have to initialize them to the default 670 // implementations within VP8EncDspInit(). 671 VP8CHisto VP8CollectHistogram; 672 VP8Idct VP8ITransform; 673 VP8Fdct VP8FTransform; 674 VP8WHT VP8ITransformWHT; 675 VP8WHT VP8FTransformWHT; 676 VP8Intra4Preds VP8EncPredLuma4; 677 VP8IntraPreds VP8EncPredLuma16; 678 VP8IntraPreds VP8EncPredChroma8; 679 VP8Metric VP8SSE16x16; 680 VP8Metric VP8SSE8x8; 681 VP8Metric VP8SSE16x8; 682 VP8Metric VP8SSE4x4; 683 VP8WMetric VP8TDisto4x4; 684 VP8WMetric VP8TDisto16x16; 685 VP8QuantizeBlock VP8EncQuantizeBlock; 686 VP8BlockCopy VP8Copy4x4; 687 688 extern void VP8EncDspInitSSE2(void); 689 extern void VP8EncDspInitNEON(void); 690 691 void VP8EncDspInit(void) { 692 InitTables(); 693 694 // default C implementations 695 VP8CollectHistogram = CollectHistogram; 696 VP8ITransform = ITransform; 697 VP8FTransform = FTransform; 698 VP8ITransformWHT = ITransformWHT; 699 VP8FTransformWHT = FTransformWHT; 700 VP8EncPredLuma4 = Intra4Preds; 701 VP8EncPredLuma16 = Intra16Preds; 702 VP8EncPredChroma8 = IntraChromaPreds; 703 VP8SSE16x16 = SSE16x16; 704 VP8SSE8x8 = SSE8x8; 705 VP8SSE16x8 = SSE16x8; 706 VP8SSE4x4 = SSE4x4; 707 VP8TDisto4x4 = Disto4x4; 708 VP8TDisto16x16 = Disto16x16; 709 VP8EncQuantizeBlock = QuantizeBlock; 710 VP8Copy4x4 = Copy4x4; 711 712 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 713 if (VP8GetCPUInfo) { 714 #if defined(WEBP_USE_SSE2) 715 if (VP8GetCPUInfo(kSSE2)) { 716 VP8EncDspInitSSE2(); 717 } 718 #elif defined(WEBP_USE_NEON) 719 if (VP8GetCPUInfo(kNEON)) { 720 VP8EncDspInitNEON(); 721 } 722 #endif 723 } 724 } 725 726 #if defined(__cplusplus) || defined(c_plusplus) 727 } // extern "C" 728 #endif 729