1 // Copyright 2010 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // Speed-critical decoding functions, default plain-C implementations. 11 // 12 // Author: Skal (pascal.massimino (at) gmail.com) 13 14 #include <assert.h> 15 16 #include "src/dsp/dsp.h" 17 #include "src/dec/vp8i_dec.h" 18 #include "src/utils/utils.h" 19 20 //------------------------------------------------------------------------------ 21 22 static WEBP_INLINE uint8_t clip_8b(int v) { 23 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; 24 } 25 26 //------------------------------------------------------------------------------ 27 // Transforms (Paragraph 14.4) 28 29 #define STORE(x, y, v) \ 30 dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3)) 31 32 #define STORE2(y, dc, d, c) do { \ 33 const int DC = (dc); \ 34 STORE(0, y, DC + (d)); \ 35 STORE(1, y, DC + (c)); \ 36 STORE(2, y, DC - (c)); \ 37 STORE(3, y, DC - (d)); \ 38 } while (0) 39 40 #define MUL1(a) ((((a) * 20091) >> 16) + (a)) 41 #define MUL2(a) (((a) * 35468) >> 16) 42 43 #if !WEBP_NEON_OMIT_C_CODE 44 static void TransformOne_C(const int16_t* in, uint8_t* dst) { 45 int C[4 * 4], *tmp; 46 int i; 47 tmp = C; 48 for (i = 0; i < 4; ++i) { // vertical pass 49 const int a = in[0] + in[8]; // [-4096, 4094] 50 const int b = in[0] - in[8]; // [-4095, 4095] 51 const int c = MUL2(in[4]) - MUL1(in[12]); // [-3783, 3783] 52 const int d = MUL1(in[4]) + MUL2(in[12]); // [-3785, 3781] 53 tmp[0] = a + d; // [-7881, 7875] 54 tmp[1] = b + c; // [-7878, 7878] 55 tmp[2] = b - c; // [-7878, 7878] 56 tmp[3] = a - d; // [-7877, 7879] 57 tmp += 4; 58 in++; 59 } 60 // Each pass is expanding the dynamic range by ~3.85 (upper bound). 61 // The exact value is (2. + (20091 + 35468) / 65536). 62 // After the second pass, maximum interval is [-3794, 3794], assuming 63 // an input in [-2048, 2047] interval. We then need to add a dst value 64 // in the [0, 255] range. 65 // In the worst case scenario, the input to clip_8b() can be as large as 66 // [-60713, 60968]. 67 tmp = C; 68 for (i = 0; i < 4; ++i) { // horizontal pass 69 const int dc = tmp[0] + 4; 70 const int a = dc + tmp[8]; 71 const int b = dc - tmp[8]; 72 const int c = MUL2(tmp[4]) - MUL1(tmp[12]); 73 const int d = MUL1(tmp[4]) + MUL2(tmp[12]); 74 STORE(0, 0, a + d); 75 STORE(1, 0, b + c); 76 STORE(2, 0, b - c); 77 STORE(3, 0, a - d); 78 tmp++; 79 dst += BPS; 80 } 81 } 82 83 // Simplified transform when only in[0], in[1] and in[4] are non-zero 84 static void TransformAC3_C(const int16_t* in, uint8_t* dst) { 85 const int a = in[0] + 4; 86 const int c4 = MUL2(in[4]); 87 const int d4 = MUL1(in[4]); 88 const int c1 = MUL2(in[1]); 89 const int d1 = MUL1(in[1]); 90 STORE2(0, a + d4, d1, c1); 91 STORE2(1, a + c4, d1, c1); 92 STORE2(2, a - c4, d1, c1); 93 STORE2(3, a - d4, d1, c1); 94 } 95 #undef MUL1 96 #undef MUL2 97 #undef STORE2 98 99 static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) { 100 TransformOne_C(in, dst); 101 if (do_two) { 102 TransformOne_C(in + 16, dst + 4); 103 } 104 } 105 #endif // !WEBP_NEON_OMIT_C_CODE 106 107 static void TransformUV_C(const int16_t* in, uint8_t* dst) { 108 VP8Transform(in + 0 * 16, dst, 1); 109 VP8Transform(in + 2 * 16, dst + 4 * BPS, 1); 110 } 111 112 #if !WEBP_NEON_OMIT_C_CODE 113 static void TransformDC_C(const int16_t* in, uint8_t* dst) { 114 const int DC = in[0] + 4; 115 int i, j; 116 for (j = 0; j < 4; ++j) { 117 for (i = 0; i < 4; ++i) { 118 STORE(i, j, DC); 119 } 120 } 121 } 122 #endif // !WEBP_NEON_OMIT_C_CODE 123 124 static void TransformDCUV_C(const int16_t* in, uint8_t* dst) { 125 if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst); 126 if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4); 127 if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS); 128 if (in[3 * 16]) VP8TransformDC(in + 3 * 16, dst + 4 * BPS + 4); 129 } 130 131 #undef STORE 132 133 //------------------------------------------------------------------------------ 134 // Paragraph 14.3 135 136 #if !WEBP_NEON_OMIT_C_CODE 137 static void TransformWHT_C(const int16_t* in, int16_t* out) { 138 int tmp[16]; 139 int i; 140 for (i = 0; i < 4; ++i) { 141 const int a0 = in[0 + i] + in[12 + i]; 142 const int a1 = in[4 + i] + in[ 8 + i]; 143 const int a2 = in[4 + i] - in[ 8 + i]; 144 const int a3 = in[0 + i] - in[12 + i]; 145 tmp[0 + i] = a0 + a1; 146 tmp[8 + i] = a0 - a1; 147 tmp[4 + i] = a3 + a2; 148 tmp[12 + i] = a3 - a2; 149 } 150 for (i = 0; i < 4; ++i) { 151 const int dc = tmp[0 + i * 4] + 3; // w/ rounder 152 const int a0 = dc + tmp[3 + i * 4]; 153 const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4]; 154 const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4]; 155 const int a3 = dc - tmp[3 + i * 4]; 156 out[ 0] = (a0 + a1) >> 3; 157 out[16] = (a3 + a2) >> 3; 158 out[32] = (a0 - a1) >> 3; 159 out[48] = (a3 - a2) >> 3; 160 out += 64; 161 } 162 } 163 #endif // !WEBP_NEON_OMIT_C_CODE 164 165 void (*VP8TransformWHT)(const int16_t* in, int16_t* out); 166 167 //------------------------------------------------------------------------------ 168 // Intra predictions 169 170 #define DST(x, y) dst[(x) + (y) * BPS] 171 172 #if !WEBP_NEON_OMIT_C_CODE 173 static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) { 174 const uint8_t* top = dst - BPS; 175 const uint8_t* const clip0 = VP8kclip1 - top[-1]; 176 int y; 177 for (y = 0; y < size; ++y) { 178 const uint8_t* const clip = clip0 + dst[-1]; 179 int x; 180 for (x = 0; x < size; ++x) { 181 dst[x] = clip[top[x]]; 182 } 183 dst += BPS; 184 } 185 } 186 static void TM4_C(uint8_t* dst) { TrueMotion(dst, 4); } 187 static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); } 188 static void TM16_C(uint8_t* dst) { TrueMotion(dst, 16); } 189 190 //------------------------------------------------------------------------------ 191 // 16x16 192 193 static void VE16_C(uint8_t* dst) { // vertical 194 int j; 195 for (j = 0; j < 16; ++j) { 196 memcpy(dst + j * BPS, dst - BPS, 16); 197 } 198 } 199 200 static void HE16_C(uint8_t* dst) { // horizontal 201 int j; 202 for (j = 16; j > 0; --j) { 203 memset(dst, dst[-1], 16); 204 dst += BPS; 205 } 206 } 207 208 static WEBP_INLINE void Put16(int v, uint8_t* dst) { 209 int j; 210 for (j = 0; j < 16; ++j) { 211 memset(dst + j * BPS, v, 16); 212 } 213 } 214 215 static void DC16_C(uint8_t* dst) { // DC 216 int DC = 16; 217 int j; 218 for (j = 0; j < 16; ++j) { 219 DC += dst[-1 + j * BPS] + dst[j - BPS]; 220 } 221 Put16(DC >> 5, dst); 222 } 223 224 static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available 225 int DC = 8; 226 int j; 227 for (j = 0; j < 16; ++j) { 228 DC += dst[-1 + j * BPS]; 229 } 230 Put16(DC >> 4, dst); 231 } 232 233 static void DC16NoLeft_C(uint8_t* dst) { // DC with left samples not available 234 int DC = 8; 235 int i; 236 for (i = 0; i < 16; ++i) { 237 DC += dst[i - BPS]; 238 } 239 Put16(DC >> 4, dst); 240 } 241 242 static void DC16NoTopLeft_C(uint8_t* dst) { // DC with no top and left samples 243 Put16(0x80, dst); 244 } 245 #endif // !WEBP_NEON_OMIT_C_CODE 246 247 VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES]; 248 249 //------------------------------------------------------------------------------ 250 // 4x4 251 252 #define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2)) 253 #define AVG2(a, b) (((a) + (b) + 1) >> 1) 254 255 #if !WEBP_NEON_OMIT_C_CODE 256 static void VE4_C(uint8_t* dst) { // vertical 257 const uint8_t* top = dst - BPS; 258 const uint8_t vals[4] = { 259 AVG3(top[-1], top[0], top[1]), 260 AVG3(top[ 0], top[1], top[2]), 261 AVG3(top[ 1], top[2], top[3]), 262 AVG3(top[ 2], top[3], top[4]) 263 }; 264 int i; 265 for (i = 0; i < 4; ++i) { 266 memcpy(dst + i * BPS, vals, sizeof(vals)); 267 } 268 } 269 #endif // !WEBP_NEON_OMIT_C_CODE 270 271 static void HE4_C(uint8_t* dst) { // horizontal 272 const int A = dst[-1 - BPS]; 273 const int B = dst[-1]; 274 const int C = dst[-1 + BPS]; 275 const int D = dst[-1 + 2 * BPS]; 276 const int E = dst[-1 + 3 * BPS]; 277 WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(A, B, C)); 278 WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(B, C, D)); 279 WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(C, D, E)); 280 WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E)); 281 } 282 283 #if !WEBP_NEON_OMIT_C_CODE 284 static void DC4_C(uint8_t* dst) { // DC 285 uint32_t dc = 4; 286 int i; 287 for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS]; 288 dc >>= 3; 289 for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4); 290 } 291 292 static void RD4_C(uint8_t* dst) { // Down-right 293 const int I = dst[-1 + 0 * BPS]; 294 const int J = dst[-1 + 1 * BPS]; 295 const int K = dst[-1 + 2 * BPS]; 296 const int L = dst[-1 + 3 * BPS]; 297 const int X = dst[-1 - BPS]; 298 const int A = dst[0 - BPS]; 299 const int B = dst[1 - BPS]; 300 const int C = dst[2 - BPS]; 301 const int D = dst[3 - BPS]; 302 DST(0, 3) = AVG3(J, K, L); 303 DST(1, 3) = DST(0, 2) = AVG3(I, J, K); 304 DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); 305 DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); 306 DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); 307 DST(3, 1) = DST(2, 0) = AVG3(C, B, A); 308 DST(3, 0) = AVG3(D, C, B); 309 } 310 311 static void LD4_C(uint8_t* dst) { // Down-Left 312 const int A = dst[0 - BPS]; 313 const int B = dst[1 - BPS]; 314 const int C = dst[2 - BPS]; 315 const int D = dst[3 - BPS]; 316 const int E = dst[4 - BPS]; 317 const int F = dst[5 - BPS]; 318 const int G = dst[6 - BPS]; 319 const int H = dst[7 - BPS]; 320 DST(0, 0) = AVG3(A, B, C); 321 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); 322 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); 323 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); 324 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); 325 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); 326 DST(3, 3) = AVG3(G, H, H); 327 } 328 #endif // !WEBP_NEON_OMIT_C_CODE 329 330 static void VR4_C(uint8_t* dst) { // Vertical-Right 331 const int I = dst[-1 + 0 * BPS]; 332 const int J = dst[-1 + 1 * BPS]; 333 const int K = dst[-1 + 2 * BPS]; 334 const int X = dst[-1 - BPS]; 335 const int A = dst[0 - BPS]; 336 const int B = dst[1 - BPS]; 337 const int C = dst[2 - BPS]; 338 const int D = dst[3 - BPS]; 339 DST(0, 0) = DST(1, 2) = AVG2(X, A); 340 DST(1, 0) = DST(2, 2) = AVG2(A, B); 341 DST(2, 0) = DST(3, 2) = AVG2(B, C); 342 DST(3, 0) = AVG2(C, D); 343 344 DST(0, 3) = AVG3(K, J, I); 345 DST(0, 2) = AVG3(J, I, X); 346 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); 347 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); 348 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); 349 DST(3, 1) = AVG3(B, C, D); 350 } 351 352 static void VL4_C(uint8_t* dst) { // Vertical-Left 353 const int A = dst[0 - BPS]; 354 const int B = dst[1 - BPS]; 355 const int C = dst[2 - BPS]; 356 const int D = dst[3 - BPS]; 357 const int E = dst[4 - BPS]; 358 const int F = dst[5 - BPS]; 359 const int G = dst[6 - BPS]; 360 const int H = dst[7 - BPS]; 361 DST(0, 0) = AVG2(A, B); 362 DST(1, 0) = DST(0, 2) = AVG2(B, C); 363 DST(2, 0) = DST(1, 2) = AVG2(C, D); 364 DST(3, 0) = DST(2, 2) = AVG2(D, E); 365 366 DST(0, 1) = AVG3(A, B, C); 367 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); 368 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); 369 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); 370 DST(3, 2) = AVG3(E, F, G); 371 DST(3, 3) = AVG3(F, G, H); 372 } 373 374 static void HU4_C(uint8_t* dst) { // Horizontal-Up 375 const int I = dst[-1 + 0 * BPS]; 376 const int J = dst[-1 + 1 * BPS]; 377 const int K = dst[-1 + 2 * BPS]; 378 const int L = dst[-1 + 3 * BPS]; 379 DST(0, 0) = AVG2(I, J); 380 DST(2, 0) = DST(0, 1) = AVG2(J, K); 381 DST(2, 1) = DST(0, 2) = AVG2(K, L); 382 DST(1, 0) = AVG3(I, J, K); 383 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); 384 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); 385 DST(3, 2) = DST(2, 2) = 386 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; 387 } 388 389 static void HD4_C(uint8_t* dst) { // Horizontal-Down 390 const int I = dst[-1 + 0 * BPS]; 391 const int J = dst[-1 + 1 * BPS]; 392 const int K = dst[-1 + 2 * BPS]; 393 const int L = dst[-1 + 3 * BPS]; 394 const int X = dst[-1 - BPS]; 395 const int A = dst[0 - BPS]; 396 const int B = dst[1 - BPS]; 397 const int C = dst[2 - BPS]; 398 399 DST(0, 0) = DST(2, 1) = AVG2(I, X); 400 DST(0, 1) = DST(2, 2) = AVG2(J, I); 401 DST(0, 2) = DST(2, 3) = AVG2(K, J); 402 DST(0, 3) = AVG2(L, K); 403 404 DST(3, 0) = AVG3(A, B, C); 405 DST(2, 0) = AVG3(X, A, B); 406 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); 407 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); 408 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); 409 DST(1, 3) = AVG3(L, K, J); 410 } 411 412 #undef DST 413 #undef AVG3 414 #undef AVG2 415 416 VP8PredFunc VP8PredLuma4[NUM_BMODES]; 417 418 //------------------------------------------------------------------------------ 419 // Chroma 420 421 #if !WEBP_NEON_OMIT_C_CODE 422 static void VE8uv_C(uint8_t* dst) { // vertical 423 int j; 424 for (j = 0; j < 8; ++j) { 425 memcpy(dst + j * BPS, dst - BPS, 8); 426 } 427 } 428 429 static void HE8uv_C(uint8_t* dst) { // horizontal 430 int j; 431 for (j = 0; j < 8; ++j) { 432 memset(dst, dst[-1], 8); 433 dst += BPS; 434 } 435 } 436 437 // helper for chroma-DC predictions 438 static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) { 439 int j; 440 for (j = 0; j < 8; ++j) { 441 memset(dst + j * BPS, value, 8); 442 } 443 } 444 445 static void DC8uv_C(uint8_t* dst) { // DC 446 int dc0 = 8; 447 int i; 448 for (i = 0; i < 8; ++i) { 449 dc0 += dst[i - BPS] + dst[-1 + i * BPS]; 450 } 451 Put8x8uv(dc0 >> 4, dst); 452 } 453 454 static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples 455 int dc0 = 4; 456 int i; 457 for (i = 0; i < 8; ++i) { 458 dc0 += dst[i - BPS]; 459 } 460 Put8x8uv(dc0 >> 3, dst); 461 } 462 463 static void DC8uvNoTop_C(uint8_t* dst) { // DC with no top samples 464 int dc0 = 4; 465 int i; 466 for (i = 0; i < 8; ++i) { 467 dc0 += dst[-1 + i * BPS]; 468 } 469 Put8x8uv(dc0 >> 3, dst); 470 } 471 472 static void DC8uvNoTopLeft_C(uint8_t* dst) { // DC with nothing 473 Put8x8uv(0x80, dst); 474 } 475 #endif // !WEBP_NEON_OMIT_C_CODE 476 477 VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES]; 478 479 //------------------------------------------------------------------------------ 480 // Edge filtering functions 481 482 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 483 // 4 pixels in, 2 pixels out 484 static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) { 485 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; 486 const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892] 487 const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15] 488 const int a2 = VP8ksclip2[(a + 3) >> 3]; 489 p[-step] = VP8kclip1[p0 + a2]; 490 p[ 0] = VP8kclip1[q0 - a1]; 491 } 492 493 // 4 pixels in, 4 pixels out 494 static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) { 495 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; 496 const int a = 3 * (q0 - p0); 497 const int a1 = VP8ksclip2[(a + 4) >> 3]; 498 const int a2 = VP8ksclip2[(a + 3) >> 3]; 499 const int a3 = (a1 + 1) >> 1; 500 p[-2*step] = VP8kclip1[p1 + a3]; 501 p[- step] = VP8kclip1[p0 + a2]; 502 p[ 0] = VP8kclip1[q0 - a1]; 503 p[ step] = VP8kclip1[q1 - a3]; 504 } 505 506 // 6 pixels in, 6 pixels out 507 static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) { 508 const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; 509 const int q0 = p[0], q1 = p[step], q2 = p[2*step]; 510 const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]]; 511 // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9] 512 const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 513 const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 514 const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 515 p[-3*step] = VP8kclip1[p2 + a3]; 516 p[-2*step] = VP8kclip1[p1 + a2]; 517 p[- step] = VP8kclip1[p0 + a1]; 518 p[ 0] = VP8kclip1[q0 - a1]; 519 p[ step] = VP8kclip1[q1 - a2]; 520 p[ 2*step] = VP8kclip1[q2 - a3]; 521 } 522 523 static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) { 524 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; 525 return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh); 526 } 527 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 528 529 #if !WEBP_NEON_OMIT_C_CODE 530 static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) { 531 const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step]; 532 return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t); 533 } 534 #endif // !WEBP_NEON_OMIT_C_CODE 535 536 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 537 static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p, 538 int step, int t, int it) { 539 const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step]; 540 const int p0 = p[-step], q0 = p[0]; 541 const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step]; 542 if ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return 0; 543 return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it && 544 VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it && 545 VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it; 546 } 547 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 548 549 //------------------------------------------------------------------------------ 550 // Simple In-loop filtering (Paragraph 15.2) 551 552 #if !WEBP_NEON_OMIT_C_CODE 553 static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) { 554 int i; 555 const int thresh2 = 2 * thresh + 1; 556 for (i = 0; i < 16; ++i) { 557 if (NeedsFilter_C(p + i, stride, thresh2)) { 558 DoFilter2_C(p + i, stride); 559 } 560 } 561 } 562 563 static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) { 564 int i; 565 const int thresh2 = 2 * thresh + 1; 566 for (i = 0; i < 16; ++i) { 567 if (NeedsFilter_C(p + i * stride, 1, thresh2)) { 568 DoFilter2_C(p + i * stride, 1); 569 } 570 } 571 } 572 573 static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) { 574 int k; 575 for (k = 3; k > 0; --k) { 576 p += 4 * stride; 577 SimpleVFilter16_C(p, stride, thresh); 578 } 579 } 580 581 static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) { 582 int k; 583 for (k = 3; k > 0; --k) { 584 p += 4; 585 SimpleHFilter16_C(p, stride, thresh); 586 } 587 } 588 #endif // !WEBP_NEON_OMIT_C_CODE 589 590 //------------------------------------------------------------------------------ 591 // Complex In-loop filtering (Paragraph 15.3) 592 593 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 594 static WEBP_INLINE void FilterLoop26_C(uint8_t* p, 595 int hstride, int vstride, int size, 596 int thresh, int ithresh, 597 int hev_thresh) { 598 const int thresh2 = 2 * thresh + 1; 599 while (size-- > 0) { 600 if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) { 601 if (Hev(p, hstride, hev_thresh)) { 602 DoFilter2_C(p, hstride); 603 } else { 604 DoFilter6_C(p, hstride); 605 } 606 } 607 p += vstride; 608 } 609 } 610 611 static WEBP_INLINE void FilterLoop24_C(uint8_t* p, 612 int hstride, int vstride, int size, 613 int thresh, int ithresh, 614 int hev_thresh) { 615 const int thresh2 = 2 * thresh + 1; 616 while (size-- > 0) { 617 if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) { 618 if (Hev(p, hstride, hev_thresh)) { 619 DoFilter2_C(p, hstride); 620 } else { 621 DoFilter4_C(p, hstride); 622 } 623 } 624 p += vstride; 625 } 626 } 627 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 628 629 #if !WEBP_NEON_OMIT_C_CODE 630 // on macroblock edges 631 static void VFilter16_C(uint8_t* p, int stride, 632 int thresh, int ithresh, int hev_thresh) { 633 FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh); 634 } 635 636 static void HFilter16_C(uint8_t* p, int stride, 637 int thresh, int ithresh, int hev_thresh) { 638 FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh); 639 } 640 641 // on three inner edges 642 static void VFilter16i_C(uint8_t* p, int stride, 643 int thresh, int ithresh, int hev_thresh) { 644 int k; 645 for (k = 3; k > 0; --k) { 646 p += 4 * stride; 647 FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh); 648 } 649 } 650 #endif // !WEBP_NEON_OMIT_C_CODE 651 652 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 653 static void HFilter16i_C(uint8_t* p, int stride, 654 int thresh, int ithresh, int hev_thresh) { 655 int k; 656 for (k = 3; k > 0; --k) { 657 p += 4; 658 FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh); 659 } 660 } 661 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 662 663 #if !WEBP_NEON_OMIT_C_CODE 664 // 8-pixels wide variant, for chroma filtering 665 static void VFilter8_C(uint8_t* u, uint8_t* v, int stride, 666 int thresh, int ithresh, int hev_thresh) { 667 FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh); 668 FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh); 669 } 670 #endif // !WEBP_NEON_OMIT_C_CODE 671 672 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 673 static void HFilter8_C(uint8_t* u, uint8_t* v, int stride, 674 int thresh, int ithresh, int hev_thresh) { 675 FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh); 676 FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh); 677 } 678 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 679 680 #if !WEBP_NEON_OMIT_C_CODE 681 static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride, 682 int thresh, int ithresh, int hev_thresh) { 683 FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); 684 FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); 685 } 686 #endif // !WEBP_NEON_OMIT_C_CODE 687 688 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 689 static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride, 690 int thresh, int ithresh, int hev_thresh) { 691 FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); 692 FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); 693 } 694 #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 695 696 //------------------------------------------------------------------------------ 697 698 static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst, 699 int dst_stride) { 700 int i, j; 701 for (j = 0; j < 8; ++j) { 702 for (i = 0; i < 8; ++i) { 703 const int delta0 = dither[i] - VP8_DITHER_AMP_CENTER; 704 const int delta1 = 705 (delta0 + VP8_DITHER_DESCALE_ROUNDER) >> VP8_DITHER_DESCALE; 706 dst[i] = clip_8b((int)dst[i] + delta1); 707 } 708 dst += dst_stride; 709 dither += 8; 710 } 711 } 712 713 //------------------------------------------------------------------------------ 714 715 VP8DecIdct2 VP8Transform; 716 VP8DecIdct VP8TransformAC3; 717 VP8DecIdct VP8TransformUV; 718 VP8DecIdct VP8TransformDC; 719 VP8DecIdct VP8TransformDCUV; 720 721 VP8LumaFilterFunc VP8VFilter16; 722 VP8LumaFilterFunc VP8HFilter16; 723 VP8ChromaFilterFunc VP8VFilter8; 724 VP8ChromaFilterFunc VP8HFilter8; 725 VP8LumaFilterFunc VP8VFilter16i; 726 VP8LumaFilterFunc VP8HFilter16i; 727 VP8ChromaFilterFunc VP8VFilter8i; 728 VP8ChromaFilterFunc VP8HFilter8i; 729 VP8SimpleFilterFunc VP8SimpleVFilter16; 730 VP8SimpleFilterFunc VP8SimpleHFilter16; 731 VP8SimpleFilterFunc VP8SimpleVFilter16i; 732 VP8SimpleFilterFunc VP8SimpleHFilter16i; 733 734 void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst, 735 int dst_stride); 736 737 extern void VP8DspInitSSE2(void); 738 extern void VP8DspInitSSE41(void); 739 extern void VP8DspInitNEON(void); 740 extern void VP8DspInitMIPS32(void); 741 extern void VP8DspInitMIPSdspR2(void); 742 extern void VP8DspInitMSA(void); 743 744 static volatile VP8CPUInfo dec_last_cpuinfo_used = 745 (VP8CPUInfo)&dec_last_cpuinfo_used; 746 747 WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) { 748 if (dec_last_cpuinfo_used == VP8GetCPUInfo) return; 749 750 VP8InitClipTables(); 751 752 #if !WEBP_NEON_OMIT_C_CODE 753 VP8TransformWHT = TransformWHT_C; 754 VP8Transform = TransformTwo_C; 755 VP8TransformDC = TransformDC_C; 756 VP8TransformAC3 = TransformAC3_C; 757 #endif 758 VP8TransformUV = TransformUV_C; 759 VP8TransformDCUV = TransformDCUV_C; 760 761 #if !WEBP_NEON_OMIT_C_CODE 762 VP8VFilter16 = VFilter16_C; 763 VP8VFilter16i = VFilter16i_C; 764 VP8HFilter16 = HFilter16_C; 765 VP8VFilter8 = VFilter8_C; 766 VP8VFilter8i = VFilter8i_C; 767 VP8SimpleVFilter16 = SimpleVFilter16_C; 768 VP8SimpleHFilter16 = SimpleHFilter16_C; 769 VP8SimpleVFilter16i = SimpleVFilter16i_C; 770 VP8SimpleHFilter16i = SimpleHFilter16i_C; 771 #endif 772 773 #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC 774 VP8HFilter16i = HFilter16i_C; 775 VP8HFilter8 = HFilter8_C; 776 VP8HFilter8i = HFilter8i_C; 777 #endif 778 779 #if !WEBP_NEON_OMIT_C_CODE 780 VP8PredLuma4[0] = DC4_C; 781 VP8PredLuma4[1] = TM4_C; 782 VP8PredLuma4[2] = VE4_C; 783 VP8PredLuma4[4] = RD4_C; 784 VP8PredLuma4[6] = LD4_C; 785 #endif 786 787 VP8PredLuma4[3] = HE4_C; 788 VP8PredLuma4[5] = VR4_C; 789 VP8PredLuma4[7] = VL4_C; 790 VP8PredLuma4[8] = HD4_C; 791 VP8PredLuma4[9] = HU4_C; 792 793 #if !WEBP_NEON_OMIT_C_CODE 794 VP8PredLuma16[0] = DC16_C; 795 VP8PredLuma16[1] = TM16_C; 796 VP8PredLuma16[2] = VE16_C; 797 VP8PredLuma16[3] = HE16_C; 798 VP8PredLuma16[4] = DC16NoTop_C; 799 VP8PredLuma16[5] = DC16NoLeft_C; 800 VP8PredLuma16[6] = DC16NoTopLeft_C; 801 802 VP8PredChroma8[0] = DC8uv_C; 803 VP8PredChroma8[1] = TM8uv_C; 804 VP8PredChroma8[2] = VE8uv_C; 805 VP8PredChroma8[3] = HE8uv_C; 806 VP8PredChroma8[4] = DC8uvNoTop_C; 807 VP8PredChroma8[5] = DC8uvNoLeft_C; 808 VP8PredChroma8[6] = DC8uvNoTopLeft_C; 809 #endif 810 811 VP8DitherCombine8x8 = DitherCombine8x8_C; 812 813 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 814 if (VP8GetCPUInfo != NULL) { 815 #if defined(WEBP_USE_SSE2) 816 if (VP8GetCPUInfo(kSSE2)) { 817 VP8DspInitSSE2(); 818 #if defined(WEBP_USE_SSE41) 819 if (VP8GetCPUInfo(kSSE4_1)) { 820 VP8DspInitSSE41(); 821 } 822 #endif 823 } 824 #endif 825 #if defined(WEBP_USE_MIPS32) 826 if (VP8GetCPUInfo(kMIPS32)) { 827 VP8DspInitMIPS32(); 828 } 829 #endif 830 #if defined(WEBP_USE_MIPS_DSP_R2) 831 if (VP8GetCPUInfo(kMIPSdspR2)) { 832 VP8DspInitMIPSdspR2(); 833 } 834 #endif 835 #if defined(WEBP_USE_MSA) 836 if (VP8GetCPUInfo(kMSA)) { 837 VP8DspInitMSA(); 838 } 839 #endif 840 } 841 842 #if defined(WEBP_USE_NEON) 843 if (WEBP_NEON_OMIT_C_CODE || 844 (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { 845 VP8DspInitNEON(); 846 } 847 #endif 848 849 assert(VP8TransformWHT != NULL); 850 assert(VP8Transform != NULL); 851 assert(VP8TransformDC != NULL); 852 assert(VP8TransformAC3 != NULL); 853 assert(VP8TransformUV != NULL); 854 assert(VP8TransformDCUV != NULL); 855 assert(VP8VFilter16 != NULL); 856 assert(VP8HFilter16 != NULL); 857 assert(VP8VFilter8 != NULL); 858 assert(VP8HFilter8 != NULL); 859 assert(VP8VFilter16i != NULL); 860 assert(VP8HFilter16i != NULL); 861 assert(VP8VFilter8i != NULL); 862 assert(VP8HFilter8i != NULL); 863 assert(VP8SimpleVFilter16 != NULL); 864 assert(VP8SimpleHFilter16 != NULL); 865 assert(VP8SimpleVFilter16i != NULL); 866 assert(VP8SimpleHFilter16i != NULL); 867 assert(VP8PredLuma4[0] != NULL); 868 assert(VP8PredLuma4[1] != NULL); 869 assert(VP8PredLuma4[2] != NULL); 870 assert(VP8PredLuma4[3] != NULL); 871 assert(VP8PredLuma4[4] != NULL); 872 assert(VP8PredLuma4[5] != NULL); 873 assert(VP8PredLuma4[6] != NULL); 874 assert(VP8PredLuma4[7] != NULL); 875 assert(VP8PredLuma4[8] != NULL); 876 assert(VP8PredLuma4[9] != NULL); 877 assert(VP8PredLuma16[0] != NULL); 878 assert(VP8PredLuma16[1] != NULL); 879 assert(VP8PredLuma16[2] != NULL); 880 assert(VP8PredLuma16[3] != NULL); 881 assert(VP8PredLuma16[4] != NULL); 882 assert(VP8PredLuma16[5] != NULL); 883 assert(VP8PredLuma16[6] != NULL); 884 assert(VP8PredChroma8[0] != NULL); 885 assert(VP8PredChroma8[1] != NULL); 886 assert(VP8PredChroma8[2] != NULL); 887 assert(VP8PredChroma8[3] != NULL); 888 assert(VP8PredChroma8[4] != NULL); 889 assert(VP8PredChroma8[5] != NULL); 890 assert(VP8PredChroma8[6] != NULL); 891 assert(VP8DitherCombine8x8 != NULL); 892 893 dec_last_cpuinfo_used = VP8GetCPUInfo; 894 } 895