1 // Copyright 2011 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // Speed-critical encoding functions. 11 // 12 // Author: Skal (pascal.massimino (at) gmail.com) 13 14 #include <assert.h> 15 #include <stdlib.h> // for abs() 16 17 #include "src/dsp/dsp.h" 18 #include "src/enc/vp8i_enc.h" 19 20 static WEBP_INLINE uint8_t clip_8b(int v) { 21 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; 22 } 23 24 #if !WEBP_NEON_OMIT_C_CODE 25 static WEBP_INLINE int clip_max(int v, int max) { 26 return (v > max) ? max : v; 27 } 28 #endif // !WEBP_NEON_OMIT_C_CODE 29 30 //------------------------------------------------------------------------------ 31 // Compute susceptibility based on DCT-coeff histograms: 32 // the higher, the "easier" the macroblock is to compress. 33 34 const int VP8DspScan[16 + 4 + 4] = { 35 // Luma 36 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 37 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 38 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 39 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, 40 41 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 42 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V 43 }; 44 45 // general-purpose util function 46 void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], 47 VP8Histogram* const histo) { 48 int max_value = 0, last_non_zero = 1; 49 int k; 50 for (k = 0; k <= MAX_COEFF_THRESH; ++k) { 51 const int value = distribution[k]; 52 if (value > 0) { 53 if (value > max_value) max_value = value; 54 last_non_zero = k; 55 } 56 } 57 histo->max_value = max_value; 58 histo->last_non_zero = last_non_zero; 59 } 60 61 #if !WEBP_NEON_OMIT_C_CODE 62 static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred, 63 int start_block, int end_block, 64 VP8Histogram* const histo) { 65 int j; 66 int distribution[MAX_COEFF_THRESH + 1] = { 0 }; 67 for (j = start_block; j < end_block; ++j) { 68 int k; 69 int16_t out[16]; 70 71 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); 72 73 // Convert coefficients to bin. 74 for (k = 0; k < 16; ++k) { 75 const int v = abs(out[k]) >> 3; 76 const int clipped_value = clip_max(v, MAX_COEFF_THRESH); 77 ++distribution[clipped_value]; 78 } 79 } 80 VP8SetHistogramData(distribution, histo); 81 } 82 #endif // !WEBP_NEON_OMIT_C_CODE 83 84 //------------------------------------------------------------------------------ 85 // run-time tables (~4k) 86 87 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] 88 89 // We declare this variable 'volatile' to prevent instruction reordering 90 // and make sure it's set to true _last_ (so as to be thread-safe) 91 static volatile int tables_ok = 0; 92 93 static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) { 94 if (!tables_ok) { 95 int i; 96 for (i = -255; i <= 255 + 255; ++i) { 97 clip1[255 + i] = clip_8b(i); 98 } 99 tables_ok = 1; 100 } 101 } 102 103 104 //------------------------------------------------------------------------------ 105 // Transforms (Paragraph 14.4) 106 107 #if !WEBP_NEON_OMIT_C_CODE 108 109 #define STORE(x, y, v) \ 110 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) 111 112 static const int kC1 = 20091 + (1 << 16); 113 static const int kC2 = 35468; 114 #define MUL(a, b) (((a) * (b)) >> 16) 115 116 static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, 117 uint8_t* dst) { 118 int C[4 * 4], *tmp; 119 int i; 120 tmp = C; 121 for (i = 0; i < 4; ++i) { // vertical pass 122 const int a = in[0] + in[8]; 123 const int b = in[0] - in[8]; 124 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); 125 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); 126 tmp[0] = a + d; 127 tmp[1] = b + c; 128 tmp[2] = b - c; 129 tmp[3] = a - d; 130 tmp += 4; 131 in++; 132 } 133 134 tmp = C; 135 for (i = 0; i < 4; ++i) { // horizontal pass 136 const int dc = tmp[0] + 4; 137 const int a = dc + tmp[8]; 138 const int b = dc - tmp[8]; 139 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); 140 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); 141 STORE(0, i, a + d); 142 STORE(1, i, b + c); 143 STORE(2, i, b - c); 144 STORE(3, i, a - d); 145 tmp++; 146 } 147 } 148 149 static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst, 150 int do_two) { 151 ITransformOne(ref, in, dst); 152 if (do_two) { 153 ITransformOne(ref + 4, in + 16, dst + 4); 154 } 155 } 156 157 static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) { 158 int i; 159 int tmp[16]; 160 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) { 161 const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255]) 162 const int d1 = src[1] - ref[1]; 163 const int d2 = src[2] - ref[2]; 164 const int d3 = src[3] - ref[3]; 165 const int a0 = (d0 + d3); // 10b [-510,510] 166 const int a1 = (d1 + d2); 167 const int a2 = (d1 - d2); 168 const int a3 = (d0 - d3); 169 tmp[0 + i * 4] = (a0 + a1) * 8; // 14b [-8160,8160] 170 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542] 171 tmp[2 + i * 4] = (a0 - a1) * 8; 172 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9; 173 } 174 for (i = 0; i < 4; ++i) { 175 const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b 176 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); 177 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); 178 const int a3 = (tmp[0 + i] - tmp[12 + i]); 179 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b 180 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); 181 out[8 + i] = (a0 - a1 + 7) >> 4; 182 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); 183 } 184 } 185 #endif // !WEBP_NEON_OMIT_C_CODE 186 187 static void FTransform2_C(const uint8_t* src, const uint8_t* ref, 188 int16_t* out) { 189 VP8FTransform(src, ref, out); 190 VP8FTransform(src + 4, ref + 4, out + 16); 191 } 192 193 #if !WEBP_NEON_OMIT_C_CODE 194 static void FTransformWHT_C(const int16_t* in, int16_t* out) { 195 // input is 12b signed 196 int32_t tmp[16]; 197 int i; 198 for (i = 0; i < 4; ++i, in += 64) { 199 const int a0 = (in[0 * 16] + in[2 * 16]); // 13b 200 const int a1 = (in[1 * 16] + in[3 * 16]); 201 const int a2 = (in[1 * 16] - in[3 * 16]); 202 const int a3 = (in[0 * 16] - in[2 * 16]); 203 tmp[0 + i * 4] = a0 + a1; // 14b 204 tmp[1 + i * 4] = a3 + a2; 205 tmp[2 + i * 4] = a3 - a2; 206 tmp[3 + i * 4] = a0 - a1; 207 } 208 for (i = 0; i < 4; ++i) { 209 const int a0 = (tmp[0 + i] + tmp[8 + i]); // 15b 210 const int a1 = (tmp[4 + i] + tmp[12+ i]); 211 const int a2 = (tmp[4 + i] - tmp[12+ i]); 212 const int a3 = (tmp[0 + i] - tmp[8 + i]); 213 const int b0 = a0 + a1; // 16b 214 const int b1 = a3 + a2; 215 const int b2 = a3 - a2; 216 const int b3 = a0 - a1; 217 out[ 0 + i] = b0 >> 1; // 15b 218 out[ 4 + i] = b1 >> 1; 219 out[ 8 + i] = b2 >> 1; 220 out[12 + i] = b3 >> 1; 221 } 222 } 223 #endif // !WEBP_NEON_OMIT_C_CODE 224 225 #undef MUL 226 #undef STORE 227 228 //------------------------------------------------------------------------------ 229 // Intra predictions 230 231 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { 232 int j; 233 for (j = 0; j < size; ++j) { 234 memset(dst + j * BPS, value, size); 235 } 236 } 237 238 static WEBP_INLINE void VerticalPred(uint8_t* dst, 239 const uint8_t* top, int size) { 240 int j; 241 if (top != NULL) { 242 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); 243 } else { 244 Fill(dst, 127, size); 245 } 246 } 247 248 static WEBP_INLINE void HorizontalPred(uint8_t* dst, 249 const uint8_t* left, int size) { 250 if (left != NULL) { 251 int j; 252 for (j = 0; j < size; ++j) { 253 memset(dst + j * BPS, left[j], size); 254 } 255 } else { 256 Fill(dst, 129, size); 257 } 258 } 259 260 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, 261 const uint8_t* top, int size) { 262 int y; 263 if (left != NULL) { 264 if (top != NULL) { 265 const uint8_t* const clip = clip1 + 255 - left[-1]; 266 for (y = 0; y < size; ++y) { 267 const uint8_t* const clip_table = clip + left[y]; 268 int x; 269 for (x = 0; x < size; ++x) { 270 dst[x] = clip_table[top[x]]; 271 } 272 dst += BPS; 273 } 274 } else { 275 HorizontalPred(dst, left, size); 276 } 277 } else { 278 // true motion without left samples (hence: with default 129 value) 279 // is equivalent to VE prediction where you just copy the top samples. 280 // Note that if top samples are not available, the default value is 281 // then 129, and not 127 as in the VerticalPred case. 282 if (top != NULL) { 283 VerticalPred(dst, top, size); 284 } else { 285 Fill(dst, 129, size); 286 } 287 } 288 } 289 290 static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, 291 const uint8_t* top, 292 int size, int round, int shift) { 293 int DC = 0; 294 int j; 295 if (top != NULL) { 296 for (j = 0; j < size; ++j) DC += top[j]; 297 if (left != NULL) { // top and left present 298 for (j = 0; j < size; ++j) DC += left[j]; 299 } else { // top, but no left 300 DC += DC; 301 } 302 DC = (DC + round) >> shift; 303 } else if (left != NULL) { // left but no top 304 for (j = 0; j < size; ++j) DC += left[j]; 305 DC += DC; 306 DC = (DC + round) >> shift; 307 } else { // no top, no left, nothing. 308 DC = 0x80; 309 } 310 Fill(dst, DC, size); 311 } 312 313 //------------------------------------------------------------------------------ 314 // Chroma 8x8 prediction (paragraph 12.2) 315 316 static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left, 317 const uint8_t* top) { 318 // U block 319 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 320 VerticalPred(C8VE8 + dst, top, 8); 321 HorizontalPred(C8HE8 + dst, left, 8); 322 TrueMotion(C8TM8 + dst, left, top, 8); 323 // V block 324 dst += 8; 325 if (top != NULL) top += 8; 326 if (left != NULL) left += 16; 327 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 328 VerticalPred(C8VE8 + dst, top, 8); 329 HorizontalPred(C8HE8 + dst, left, 8); 330 TrueMotion(C8TM8 + dst, left, top, 8); 331 } 332 333 //------------------------------------------------------------------------------ 334 // luma 16x16 prediction (paragraph 12.3) 335 336 static void Intra16Preds_C(uint8_t* dst, 337 const uint8_t* left, const uint8_t* top) { 338 DCMode(I16DC16 + dst, left, top, 16, 16, 5); 339 VerticalPred(I16VE16 + dst, top, 16); 340 HorizontalPred(I16HE16 + dst, left, 16); 341 TrueMotion(I16TM16 + dst, left, top, 16); 342 } 343 344 //------------------------------------------------------------------------------ 345 // luma 4x4 prediction 346 347 #define DST(x, y) dst[(x) + (y) * BPS] 348 #define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2)) 349 #define AVG2(a, b) (((a) + (b) + 1) >> 1) 350 351 static void VE4(uint8_t* dst, const uint8_t* top) { // vertical 352 const uint8_t vals[4] = { 353 AVG3(top[-1], top[0], top[1]), 354 AVG3(top[ 0], top[1], top[2]), 355 AVG3(top[ 1], top[2], top[3]), 356 AVG3(top[ 2], top[3], top[4]) 357 }; 358 int i; 359 for (i = 0; i < 4; ++i) { 360 memcpy(dst + i * BPS, vals, 4); 361 } 362 } 363 364 static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal 365 const int X = top[-1]; 366 const int I = top[-2]; 367 const int J = top[-3]; 368 const int K = top[-4]; 369 const int L = top[-5]; 370 WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(X, I, J)); 371 WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(I, J, K)); 372 WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(J, K, L)); 373 WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L)); 374 } 375 376 static void DC4(uint8_t* dst, const uint8_t* top) { 377 uint32_t dc = 4; 378 int i; 379 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i]; 380 Fill(dst, dc >> 3, 4); 381 } 382 383 static void RD4(uint8_t* dst, const uint8_t* top) { 384 const int X = top[-1]; 385 const int I = top[-2]; 386 const int J = top[-3]; 387 const int K = top[-4]; 388 const int L = top[-5]; 389 const int A = top[0]; 390 const int B = top[1]; 391 const int C = top[2]; 392 const int D = top[3]; 393 DST(0, 3) = AVG3(J, K, L); 394 DST(0, 2) = DST(1, 3) = AVG3(I, J, K); 395 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J); 396 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I); 397 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X); 398 DST(2, 0) = DST(3, 1) = AVG3(C, B, A); 399 DST(3, 0) = AVG3(D, C, B); 400 } 401 402 static void LD4(uint8_t* dst, const uint8_t* top) { 403 const int A = top[0]; 404 const int B = top[1]; 405 const int C = top[2]; 406 const int D = top[3]; 407 const int E = top[4]; 408 const int F = top[5]; 409 const int G = top[6]; 410 const int H = top[7]; 411 DST(0, 0) = AVG3(A, B, C); 412 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); 413 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); 414 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); 415 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); 416 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); 417 DST(3, 3) = AVG3(G, H, H); 418 } 419 420 static void VR4(uint8_t* dst, const uint8_t* top) { 421 const int X = top[-1]; 422 const int I = top[-2]; 423 const int J = top[-3]; 424 const int K = top[-4]; 425 const int A = top[0]; 426 const int B = top[1]; 427 const int C = top[2]; 428 const int D = top[3]; 429 DST(0, 0) = DST(1, 2) = AVG2(X, A); 430 DST(1, 0) = DST(2, 2) = AVG2(A, B); 431 DST(2, 0) = DST(3, 2) = AVG2(B, C); 432 DST(3, 0) = AVG2(C, D); 433 434 DST(0, 3) = AVG3(K, J, I); 435 DST(0, 2) = AVG3(J, I, X); 436 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); 437 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); 438 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); 439 DST(3, 1) = AVG3(B, C, D); 440 } 441 442 static void VL4(uint8_t* dst, const uint8_t* top) { 443 const int A = top[0]; 444 const int B = top[1]; 445 const int C = top[2]; 446 const int D = top[3]; 447 const int E = top[4]; 448 const int F = top[5]; 449 const int G = top[6]; 450 const int H = top[7]; 451 DST(0, 0) = AVG2(A, B); 452 DST(1, 0) = DST(0, 2) = AVG2(B, C); 453 DST(2, 0) = DST(1, 2) = AVG2(C, D); 454 DST(3, 0) = DST(2, 2) = AVG2(D, E); 455 456 DST(0, 1) = AVG3(A, B, C); 457 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); 458 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); 459 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); 460 DST(3, 2) = AVG3(E, F, G); 461 DST(3, 3) = AVG3(F, G, H); 462 } 463 464 static void HU4(uint8_t* dst, const uint8_t* top) { 465 const int I = top[-2]; 466 const int J = top[-3]; 467 const int K = top[-4]; 468 const int L = top[-5]; 469 DST(0, 0) = AVG2(I, J); 470 DST(2, 0) = DST(0, 1) = AVG2(J, K); 471 DST(2, 1) = DST(0, 2) = AVG2(K, L); 472 DST(1, 0) = AVG3(I, J, K); 473 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); 474 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); 475 DST(3, 2) = DST(2, 2) = 476 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; 477 } 478 479 static void HD4(uint8_t* dst, const uint8_t* top) { 480 const int X = top[-1]; 481 const int I = top[-2]; 482 const int J = top[-3]; 483 const int K = top[-4]; 484 const int L = top[-5]; 485 const int A = top[0]; 486 const int B = top[1]; 487 const int C = top[2]; 488 489 DST(0, 0) = DST(2, 1) = AVG2(I, X); 490 DST(0, 1) = DST(2, 2) = AVG2(J, I); 491 DST(0, 2) = DST(2, 3) = AVG2(K, J); 492 DST(0, 3) = AVG2(L, K); 493 494 DST(3, 0) = AVG3(A, B, C); 495 DST(2, 0) = AVG3(X, A, B); 496 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); 497 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); 498 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); 499 DST(1, 3) = AVG3(L, K, J); 500 } 501 502 static void TM4(uint8_t* dst, const uint8_t* top) { 503 int x, y; 504 const uint8_t* const clip = clip1 + 255 - top[-1]; 505 for (y = 0; y < 4; ++y) { 506 const uint8_t* const clip_table = clip + top[-2 - y]; 507 for (x = 0; x < 4; ++x) { 508 dst[x] = clip_table[top[x]]; 509 } 510 dst += BPS; 511 } 512 } 513 514 #undef DST 515 #undef AVG3 516 #undef AVG2 517 518 // Left samples are top[-5 .. -2], top_left is top[-1], top are 519 // located at top[0..3], and top right is top[4..7] 520 static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) { 521 DC4(I4DC4 + dst, top); 522 TM4(I4TM4 + dst, top); 523 VE4(I4VE4 + dst, top); 524 HE4(I4HE4 + dst, top); 525 RD4(I4RD4 + dst, top); 526 VR4(I4VR4 + dst, top); 527 LD4(I4LD4 + dst, top); 528 VL4(I4VL4 + dst, top); 529 HD4(I4HD4 + dst, top); 530 HU4(I4HU4 + dst, top); 531 } 532 533 //------------------------------------------------------------------------------ 534 // Metric 535 536 #if !WEBP_NEON_OMIT_C_CODE 537 static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, 538 int w, int h) { 539 int count = 0; 540 int y, x; 541 for (y = 0; y < h; ++y) { 542 for (x = 0; x < w; ++x) { 543 const int diff = (int)a[x] - b[x]; 544 count += diff * diff; 545 } 546 a += BPS; 547 b += BPS; 548 } 549 return count; 550 } 551 552 static int SSE16x16_C(const uint8_t* a, const uint8_t* b) { 553 return GetSSE(a, b, 16, 16); 554 } 555 static int SSE16x8_C(const uint8_t* a, const uint8_t* b) { 556 return GetSSE(a, b, 16, 8); 557 } 558 static int SSE8x8_C(const uint8_t* a, const uint8_t* b) { 559 return GetSSE(a, b, 8, 8); 560 } 561 static int SSE4x4_C(const uint8_t* a, const uint8_t* b) { 562 return GetSSE(a, b, 4, 4); 563 } 564 #endif // !WEBP_NEON_OMIT_C_CODE 565 566 static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) { 567 int k, x, y; 568 for (k = 0; k < 4; ++k) { 569 uint32_t avg = 0; 570 for (y = 0; y < 4; ++y) { 571 for (x = 0; x < 4; ++x) { 572 avg += ref[x + y * BPS]; 573 } 574 } 575 dc[k] = avg; 576 ref += 4; // go to next 4x4 block. 577 } 578 } 579 580 //------------------------------------------------------------------------------ 581 // Texture distortion 582 // 583 // We try to match the spectral content (weighted) between source and 584 // reconstructed samples. 585 586 #if !WEBP_NEON_OMIT_C_CODE 587 // Hadamard transform 588 // Returns the weighted sum of the absolute value of transformed coefficients. 589 // w[] contains a row-major 4 by 4 symmetric matrix. 590 static int TTransform(const uint8_t* in, const uint16_t* w) { 591 int sum = 0; 592 int tmp[16]; 593 int i; 594 // horizontal pass 595 for (i = 0; i < 4; ++i, in += BPS) { 596 const int a0 = in[0] + in[2]; 597 const int a1 = in[1] + in[3]; 598 const int a2 = in[1] - in[3]; 599 const int a3 = in[0] - in[2]; 600 tmp[0 + i * 4] = a0 + a1; 601 tmp[1 + i * 4] = a3 + a2; 602 tmp[2 + i * 4] = a3 - a2; 603 tmp[3 + i * 4] = a0 - a1; 604 } 605 // vertical pass 606 for (i = 0; i < 4; ++i, ++w) { 607 const int a0 = tmp[0 + i] + tmp[8 + i]; 608 const int a1 = tmp[4 + i] + tmp[12+ i]; 609 const int a2 = tmp[4 + i] - tmp[12+ i]; 610 const int a3 = tmp[0 + i] - tmp[8 + i]; 611 const int b0 = a0 + a1; 612 const int b1 = a3 + a2; 613 const int b2 = a3 - a2; 614 const int b3 = a0 - a1; 615 616 sum += w[ 0] * abs(b0); 617 sum += w[ 4] * abs(b1); 618 sum += w[ 8] * abs(b2); 619 sum += w[12] * abs(b3); 620 } 621 return sum; 622 } 623 624 static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b, 625 const uint16_t* const w) { 626 const int sum1 = TTransform(a, w); 627 const int sum2 = TTransform(b, w); 628 return abs(sum2 - sum1) >> 5; 629 } 630 631 static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b, 632 const uint16_t* const w) { 633 int D = 0; 634 int x, y; 635 for (y = 0; y < 16 * BPS; y += 4 * BPS) { 636 for (x = 0; x < 16; x += 4) { 637 D += Disto4x4_C(a + x + y, b + x + y, w); 638 } 639 } 640 return D; 641 } 642 #endif // !WEBP_NEON_OMIT_C_CODE 643 644 //------------------------------------------------------------------------------ 645 // Quantization 646 // 647 648 static const uint8_t kZigzag[16] = { 649 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 650 }; 651 652 // Simple quantization 653 static int QuantizeBlock_C(int16_t in[16], int16_t out[16], 654 const VP8Matrix* const mtx) { 655 int last = -1; 656 int n; 657 for (n = 0; n < 16; ++n) { 658 const int j = kZigzag[n]; 659 const int sign = (in[j] < 0); 660 const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; 661 if (coeff > mtx->zthresh_[j]) { 662 const uint32_t Q = mtx->q_[j]; 663 const uint32_t iQ = mtx->iq_[j]; 664 const uint32_t B = mtx->bias_[j]; 665 int level = QUANTDIV(coeff, iQ, B); 666 if (level > MAX_LEVEL) level = MAX_LEVEL; 667 if (sign) level = -level; 668 in[j] = level * (int)Q; 669 out[n] = level; 670 if (level) last = n; 671 } else { 672 out[n] = 0; 673 in[j] = 0; 674 } 675 } 676 return (last >= 0); 677 } 678 679 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 680 static int Quantize2Blocks_C(int16_t in[32], int16_t out[32], 681 const VP8Matrix* const mtx) { 682 int nz; 683 nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0; 684 nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1; 685 return nz; 686 } 687 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 688 689 //------------------------------------------------------------------------------ 690 // Block copy 691 692 static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) { 693 int y; 694 for (y = 0; y < h; ++y) { 695 memcpy(dst, src, w); 696 src += BPS; 697 dst += BPS; 698 } 699 } 700 701 static void Copy4x4_C(const uint8_t* src, uint8_t* dst) { 702 Copy(src, dst, 4, 4); 703 } 704 705 static void Copy16x8_C(const uint8_t* src, uint8_t* dst) { 706 Copy(src, dst, 16, 8); 707 } 708 709 //------------------------------------------------------------------------------ 710 // Initialization 711 712 // Speed-critical function pointers. We have to initialize them to the default 713 // implementations within VP8EncDspInit(). 714 VP8CHisto VP8CollectHistogram; 715 VP8Idct VP8ITransform; 716 VP8Fdct VP8FTransform; 717 VP8Fdct VP8FTransform2; 718 VP8WHT VP8FTransformWHT; 719 VP8Intra4Preds VP8EncPredLuma4; 720 VP8IntraPreds VP8EncPredLuma16; 721 VP8IntraPreds VP8EncPredChroma8; 722 VP8Metric VP8SSE16x16; 723 VP8Metric VP8SSE8x8; 724 VP8Metric VP8SSE16x8; 725 VP8Metric VP8SSE4x4; 726 VP8WMetric VP8TDisto4x4; 727 VP8WMetric VP8TDisto16x16; 728 VP8MeanMetric VP8Mean16x4; 729 VP8QuantizeBlock VP8EncQuantizeBlock; 730 VP8Quantize2Blocks VP8EncQuantize2Blocks; 731 VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; 732 VP8BlockCopy VP8Copy4x4; 733 VP8BlockCopy VP8Copy16x8; 734 735 extern void VP8EncDspInitSSE2(void); 736 extern void VP8EncDspInitSSE41(void); 737 extern void VP8EncDspInitNEON(void); 738 extern void VP8EncDspInitMIPS32(void); 739 extern void VP8EncDspInitMIPSdspR2(void); 740 extern void VP8EncDspInitMSA(void); 741 742 WEBP_DSP_INIT_FUNC(VP8EncDspInit) { 743 VP8DspInit(); // common inverse transforms 744 InitTables(); 745 746 // default C implementations 747 #if !WEBP_NEON_OMIT_C_CODE 748 VP8ITransform = ITransform_C; 749 VP8FTransform = FTransform_C; 750 VP8FTransformWHT = FTransformWHT_C; 751 VP8TDisto4x4 = Disto4x4_C; 752 VP8TDisto16x16 = Disto16x16_C; 753 VP8CollectHistogram = CollectHistogram_C; 754 VP8SSE16x16 = SSE16x16_C; 755 VP8SSE16x8 = SSE16x8_C; 756 VP8SSE8x8 = SSE8x8_C; 757 VP8SSE4x4 = SSE4x4_C; 758 #endif 759 760 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 761 VP8EncQuantizeBlock = QuantizeBlock_C; 762 VP8EncQuantize2Blocks = Quantize2Blocks_C; 763 #endif 764 765 VP8FTransform2 = FTransform2_C; 766 VP8EncPredLuma4 = Intra4Preds_C; 767 VP8EncPredLuma16 = Intra16Preds_C; 768 VP8EncPredChroma8 = IntraChromaPreds_C; 769 VP8Mean16x4 = Mean16x4_C; 770 VP8EncQuantizeBlockWHT = QuantizeBlock_C; 771 VP8Copy4x4 = Copy4x4_C; 772 VP8Copy16x8 = Copy16x8_C; 773 774 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 775 if (VP8GetCPUInfo != NULL) { 776 #if defined(WEBP_USE_SSE2) 777 if (VP8GetCPUInfo(kSSE2)) { 778 VP8EncDspInitSSE2(); 779 #if defined(WEBP_USE_SSE41) 780 if (VP8GetCPUInfo(kSSE4_1)) { 781 VP8EncDspInitSSE41(); 782 } 783 #endif 784 } 785 #endif 786 #if defined(WEBP_USE_MIPS32) 787 if (VP8GetCPUInfo(kMIPS32)) { 788 VP8EncDspInitMIPS32(); 789 } 790 #endif 791 #if defined(WEBP_USE_MIPS_DSP_R2) 792 if (VP8GetCPUInfo(kMIPSdspR2)) { 793 VP8EncDspInitMIPSdspR2(); 794 } 795 #endif 796 #if defined(WEBP_USE_MSA) 797 if (VP8GetCPUInfo(kMSA)) { 798 VP8EncDspInitMSA(); 799 } 800 #endif 801 } 802 803 #if defined(WEBP_USE_NEON) 804 if (WEBP_NEON_OMIT_C_CODE || 805 (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { 806 VP8EncDspInitNEON(); 807 } 808 #endif 809 810 assert(VP8ITransform != NULL); 811 assert(VP8FTransform != NULL); 812 assert(VP8FTransformWHT != NULL); 813 assert(VP8TDisto4x4 != NULL); 814 assert(VP8TDisto16x16 != NULL); 815 assert(VP8CollectHistogram != NULL); 816 assert(VP8SSE16x16 != NULL); 817 assert(VP8SSE16x8 != NULL); 818 assert(VP8SSE8x8 != NULL); 819 assert(VP8SSE4x4 != NULL); 820 assert(VP8EncQuantizeBlock != NULL); 821 assert(VP8EncQuantize2Blocks != NULL); 822 assert(VP8FTransform2 != NULL); 823 assert(VP8EncPredLuma4 != NULL); 824 assert(VP8EncPredLuma16 != NULL); 825 assert(VP8EncPredChroma8 != NULL); 826 assert(VP8Mean16x4 != NULL); 827 assert(VP8EncQuantizeBlockWHT != NULL); 828 assert(VP8Copy4x4 != NULL); 829 assert(VP8Copy16x8 != NULL); 830 } 831