1 // Copyright 2010 Google Inc. 2 // 3 // This code is licensed under the same terms as WebM: 4 // Software License Agreement: http://www.webmproject.org/license/software/ 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 6 // ----------------------------------------------------------------------------- 7 // 8 // Frame-reconstruction function. Memory allocation. 9 // 10 // Author: Skal (pascal.massimino (at) gmail.com) 11 12 #include <stdlib.h> 13 #include "./vp8i.h" 14 15 #if defined(__cplusplus) || defined(c_plusplus) 16 extern "C" { 17 #endif 18 19 #define ALIGN_MASK (32 - 1) 20 21 //------------------------------------------------------------------------------ 22 // Memory setup 23 24 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary 25 // for caching, given a filtering level. 26 // Simple filter: up to 2 luma samples are read and 1 is written. 27 // Complex filter: up to 4 luma samples are read and 3 are written. Same for 28 // U/V, so it's 8 samples total (because of the 2x upsampling). 29 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 }; 30 31 int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { 32 const int mb_w = dec->mb_w_; 33 const int intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t); 34 const int top_size = (16 + 8 + 8) * mb_w; 35 const int info_size = (mb_w + 1) * sizeof(VP8MB); 36 const int yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_); 37 const int coeffs_size = 384 * sizeof(*dec->coeffs_); 38 const int cache_height = (16 + kFilterExtraRows[dec->filter_type_]) * 3 / 2; 39 const int cache_size = top_size * cache_height; 40 const int alpha_size = 41 dec->alpha_data_ ? (dec->pic_hdr_.width_ * dec->pic_hdr_.height_) : 0; 42 const int needed = intra_pred_mode_size 43 + top_size + info_size 44 + yuv_size + coeffs_size 45 + cache_size + alpha_size + ALIGN_MASK; 46 uint8_t* mem; 47 48 if (needed > dec->mem_size_) { 49 free(dec->mem_); 50 dec->mem_size_ = 0; 51 dec->mem_ = (uint8_t*)malloc(needed); 52 if (dec->mem_ == NULL) { 53 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, 54 "no memory during frame initialization."); 55 } 56 dec->mem_size_ = needed; 57 } 58 59 mem = (uint8_t*)dec->mem_; 60 dec->intra_t_ = (uint8_t*)mem; 61 mem += intra_pred_mode_size; 62 63 dec->y_t_ = (uint8_t*)mem; 64 mem += 16 * mb_w; 65 dec->u_t_ = (uint8_t*)mem; 66 mem += 8 * mb_w; 67 dec->v_t_ = (uint8_t*)mem; 68 mem += 8 * mb_w; 69 70 dec->mb_info_ = ((VP8MB*)mem) + 1; 71 mem += info_size; 72 73 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK); 74 assert((yuv_size & ALIGN_MASK) == 0); 75 dec->yuv_b_ = (uint8_t*)mem; 76 mem += yuv_size; 77 78 dec->coeffs_ = (int16_t*)mem; 79 mem += coeffs_size; 80 81 dec->cache_y_stride_ = 16 * mb_w; 82 dec->cache_uv_stride_ = 8 * mb_w; 83 { 84 const int extra_rows = kFilterExtraRows[dec->filter_type_]; 85 const int extra_y = extra_rows * dec->cache_y_stride_; 86 const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_; 87 dec->cache_y_ = ((uint8_t*)mem) + extra_y; 88 dec->cache_u_ = dec->cache_y_ + 16 * dec->cache_y_stride_ + extra_uv; 89 dec->cache_v_ = dec->cache_u_ + 8 * dec->cache_uv_stride_ + extra_uv; 90 } 91 mem += cache_size; 92 93 // alpha plane 94 dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL; 95 mem += alpha_size; 96 97 // note: left-info is initialized once for all. 98 memset(dec->mb_info_ - 1, 0, (mb_w + 1) * sizeof(*dec->mb_info_)); 99 100 // initialize top 101 memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size); 102 103 // prepare 'io' 104 io->mb_y = 0; 105 io->y = dec->cache_y_; 106 io->u = dec->cache_u_; 107 io->v = dec->cache_v_; 108 io->y_stride = dec->cache_y_stride_; 109 io->uv_stride = dec->cache_uv_stride_; 110 io->fancy_upsampling = 0; // default 111 io->a = NULL; 112 113 // Init critical function pointers and look-up tables. 114 VP8DspInitTables(); 115 VP8DspInit(); 116 117 return 1; 118 } 119 120 //------------------------------------------------------------------------------ 121 // Filtering 122 123 static inline int hev_thresh_from_level(int level, int keyframe) { 124 if (keyframe) { 125 return (level >= 40) ? 2 : (level >= 15) ? 1 : 0; 126 } else { 127 return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0; 128 } 129 } 130 131 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) { 132 VP8MB* const mb = dec->mb_info_ + mb_x; 133 uint8_t* const y_dst = dec->cache_y_ + mb_x * 16; 134 const int y_bps = dec->cache_y_stride_; 135 const int level = mb->f_level_; 136 const int ilevel = mb->f_ilevel_; 137 const int limit = 2 * level + ilevel; 138 if (level == 0) { 139 return; 140 } 141 if (dec->filter_type_ == 1) { // simple 142 if (mb_x > 0) { 143 VP8SimpleHFilter16(y_dst, y_bps, limit + 4); 144 } 145 if (mb->f_inner_) { 146 VP8SimpleHFilter16i(y_dst, y_bps, limit); 147 } 148 if (mb_y > 0) { 149 VP8SimpleVFilter16(y_dst, y_bps, limit + 4); 150 } 151 if (mb->f_inner_) { 152 VP8SimpleVFilter16i(y_dst, y_bps, limit); 153 } 154 } else { // complex 155 uint8_t* const u_dst = dec->cache_u_ + mb_x * 8; 156 uint8_t* const v_dst = dec->cache_v_ + mb_x * 8; 157 const int uv_bps = dec->cache_uv_stride_; 158 const int hev_thresh = 159 hev_thresh_from_level(level, dec->frm_hdr_.key_frame_); 160 if (mb_x > 0) { 161 VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); 162 VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); 163 } 164 if (mb->f_inner_) { 165 VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh); 166 VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh); 167 } 168 if (mb_y > 0) { 169 VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); 170 VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); 171 } 172 if (mb->f_inner_) { 173 VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh); 174 VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh); 175 } 176 } 177 } 178 179 void VP8FilterRow(const VP8Decoder* const dec) { 180 int mb_x; 181 assert(dec->filter_type_ > 0); 182 if (dec->mb_y_ < dec->tl_mb_y_ || dec->mb_y_ > dec->br_mb_y_) { 183 return; 184 } 185 for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) { 186 DoFilter(dec, mb_x, dec->mb_y_); 187 } 188 } 189 190 //------------------------------------------------------------------------------ 191 192 void VP8StoreBlock(VP8Decoder* const dec) { 193 if (dec->filter_type_ > 0) { 194 VP8MB* const info = dec->mb_info_ + dec->mb_x_; 195 int level = dec->filter_levels_[dec->segment_]; 196 if (dec->filter_hdr_.use_lf_delta_) { 197 // TODO(skal): only CURRENT is handled for now. 198 level += dec->filter_hdr_.ref_lf_delta_[0]; 199 if (dec->is_i4x4_) { 200 level += dec->filter_hdr_.mode_lf_delta_[0]; 201 } 202 } 203 level = (level < 0) ? 0 : (level > 63) ? 63 : level; 204 info->f_level_ = level; 205 206 if (dec->filter_hdr_.sharpness_ > 0) { 207 if (dec->filter_hdr_.sharpness_ > 4) { 208 level >>= 2; 209 } else { 210 level >>= 1; 211 } 212 if (level > 9 - dec->filter_hdr_.sharpness_) { 213 level = 9 - dec->filter_hdr_.sharpness_; 214 } 215 } 216 217 info->f_ilevel_ = (level < 1) ? 1 : level; 218 info->f_inner_ = (!info->skip_ || dec->is_i4x4_); 219 } 220 { 221 // Transfer samples to row cache 222 int y; 223 uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16; 224 uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8; 225 uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8; 226 for (y = 0; y < 16; ++y) { 227 memcpy(ydst + y * dec->cache_y_stride_, 228 dec->yuv_b_ + Y_OFF + y * BPS, 16); 229 } 230 for (y = 0; y < 8; ++y) { 231 memcpy(udst + y * dec->cache_uv_stride_, 232 dec->yuv_b_ + U_OFF + y * BPS, 8); 233 memcpy(vdst + y * dec->cache_uv_stride_, 234 dec->yuv_b_ + V_OFF + y * BPS, 8); 235 } 236 } 237 } 238 239 //------------------------------------------------------------------------------ 240 // This function is called after a row of macroblocks is finished decoding. 241 // It also takes into account the following restrictions: 242 // * In case of in-loop filtering, we must hold off sending some of the bottom 243 // pixels as they are yet unfiltered. They will be when the next macroblock 244 // row is decoded. Meanwhile, we must preserve them by rotating them in the 245 // cache area. This doesn't hold for the very bottom row of the uncropped 246 // picture of course. 247 // * we must clip the remaining pixels against the cropping area. The VP8Io 248 // struct must have the following fields set correctly before calling put(): 249 250 #define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB 251 252 int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) { 253 const int extra_y_rows = kFilterExtraRows[dec->filter_type_]; 254 const int ysize = extra_y_rows * dec->cache_y_stride_; 255 const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_; 256 uint8_t* const ydst = dec->cache_y_ - ysize; 257 uint8_t* const udst = dec->cache_u_ - uvsize; 258 uint8_t* const vdst = dec->cache_v_ - uvsize; 259 const int first_row = (dec->mb_y_ == 0); 260 const int last_row = (dec->mb_y_ >= dec->br_mb_y_ - 1); 261 int y_start = MACROBLOCK_VPOS(dec->mb_y_); 262 int y_end = MACROBLOCK_VPOS(dec->mb_y_ + 1); 263 if (io->put) { 264 if (!first_row) { 265 y_start -= extra_y_rows; 266 io->y = ydst; 267 io->u = udst; 268 io->v = vdst; 269 } else { 270 io->y = dec->cache_y_; 271 io->u = dec->cache_u_; 272 io->v = dec->cache_v_; 273 } 274 275 if (!last_row) { 276 y_end -= extra_y_rows; 277 } 278 if (y_end > io->crop_bottom) { 279 y_end = io->crop_bottom; // make sure we don't overflow on last row. 280 } 281 io->a = NULL; 282 #ifdef WEBP_EXPERIMENTAL_FEATURES 283 if (dec->alpha_data_) { 284 io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start); 285 if (io->a == NULL) { 286 return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, 287 "Could not decode alpha data."); 288 } 289 } 290 #endif 291 if (y_start < io->crop_top) { 292 const int delta_y = io->crop_top - y_start; 293 y_start = io->crop_top; 294 assert(!(delta_y & 1)); 295 io->y += dec->cache_y_stride_ * delta_y; 296 io->u += dec->cache_uv_stride_ * (delta_y >> 1); 297 io->v += dec->cache_uv_stride_ * (delta_y >> 1); 298 if (io->a) { 299 io->a += io->width * delta_y; 300 } 301 } 302 if (y_start < y_end) { 303 io->y += io->crop_left; 304 io->u += io->crop_left >> 1; 305 io->v += io->crop_left >> 1; 306 if (io->a) { 307 io->a += io->crop_left; 308 } 309 io->mb_y = y_start - io->crop_top; 310 io->mb_w = io->crop_right - io->crop_left; 311 io->mb_h = y_end - y_start; 312 if (!io->put(io)) { 313 return 0; 314 } 315 } 316 } 317 // rotate top samples 318 if (!last_row) { 319 memcpy(ydst, ydst + 16 * dec->cache_y_stride_, ysize); 320 memcpy(udst, udst + 8 * dec->cache_uv_stride_, uvsize); 321 memcpy(vdst, vdst + 8 * dec->cache_uv_stride_, uvsize); 322 } 323 return 1; 324 } 325 326 #undef MACROBLOCK_VPOS 327 328 //------------------------------------------------------------------------------ 329 // Finish setting up the decoding parameter once user's setup() is called. 330 331 VP8StatusCode VP8FinishFrameSetup(VP8Decoder* const dec, VP8Io* const io) { 332 // Call setup() first. This may trigger additional decoding features on 'io'. 333 if (io->setup && !io->setup(io)) { 334 VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed"); 335 return dec->status_; 336 } 337 338 // Disable filtering per user request 339 if (io->bypass_filtering) { 340 dec->filter_type_ = 0; 341 } 342 // TODO(skal): filter type / strength / sharpness forcing 343 344 // Define the area where we can skip in-loop filtering, in case of cropping. 345 // 346 // 'Simple' filter reads two luma samples outside of the macroblock and 347 // and filters one. It doesn't filter the chroma samples. Hence, we can 348 // avoid doing the in-loop filtering before crop_top/crop_left position. 349 // For the 'Complex' filter, 3 samples are read and up to 3 are filtered. 350 // Means: there's a dependency chain that goes all the way up to the 351 // top-left corner of the picture (MB #0). We must filter all the previous 352 // macroblocks. 353 // TODO(skal): add an 'approximate_decoding' option, that won't produce 354 // a 1:1 bit-exactness for complex filtering? 355 { 356 const int extra_pixels = kFilterExtraRows[dec->filter_type_]; 357 if (dec->filter_type_ == 2) { 358 // For complex filter, we need to preserve the dependency chain. 359 dec->tl_mb_x_ = 0; 360 dec->tl_mb_y_ = 0; 361 } else { 362 // For simple filter, we can filter only the cropped region. 363 dec->tl_mb_y_ = io->crop_top >> 4; 364 dec->tl_mb_x_ = io->crop_left >> 4; 365 } 366 // We need some 'extra' pixels on the right/bottom. 367 dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4; 368 dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4; 369 if (dec->br_mb_x_ > dec->mb_w_) { 370 dec->br_mb_x_ = dec->mb_w_; 371 } 372 if (dec->br_mb_y_ > dec->mb_h_) { 373 dec->br_mb_y_ = dec->mb_h_; 374 } 375 } 376 return VP8_STATUS_OK; 377 } 378 379 //------------------------------------------------------------------------------ 380 // Main reconstruction function. 381 382 static const int kScan[16] = { 383 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 384 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 385 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 386 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS 387 }; 388 389 static inline int CheckMode(VP8Decoder* const dec, int mode) { 390 if (mode == B_DC_PRED) { 391 if (dec->mb_x_ == 0) { 392 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; 393 } else { 394 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; 395 } 396 } 397 return mode; 398 } 399 400 static inline void Copy32b(uint8_t* dst, uint8_t* src) { 401 *(uint32_t*)dst = *(uint32_t*)src; 402 } 403 404 void VP8ReconstructBlock(VP8Decoder* const dec) { 405 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; 406 uint8_t* const u_dst = dec->yuv_b_ + U_OFF; 407 uint8_t* const v_dst = dec->yuv_b_ + V_OFF; 408 409 // Rotate in the left samples from previously decoded block. We move four 410 // pixels at a time for alignment reason, and because of in-loop filter. 411 if (dec->mb_x_ > 0) { 412 int j; 413 for (j = -1; j < 16; ++j) { 414 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); 415 } 416 for (j = -1; j < 8; ++j) { 417 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); 418 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); 419 } 420 } else { 421 int j; 422 for (j = 0; j < 16; ++j) { 423 y_dst[j * BPS - 1] = 129; 424 } 425 for (j = 0; j < 8; ++j) { 426 u_dst[j * BPS - 1] = 129; 427 v_dst[j * BPS - 1] = 129; 428 } 429 // Init top-left sample on left column too 430 if (dec->mb_y_ > 0) { 431 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; 432 } 433 } 434 { 435 // bring top samples into the cache 436 uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16; 437 uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8; 438 uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8; 439 const int16_t* coeffs = dec->coeffs_; 440 int n; 441 442 if (dec->mb_y_ > 0) { 443 memcpy(y_dst - BPS, top_y, 16); 444 memcpy(u_dst - BPS, top_u, 8); 445 memcpy(v_dst - BPS, top_v, 8); 446 } else if (dec->mb_x_ == 0) { 447 // we only need to do this init once at block (0,0). 448 // Afterward, it remains valid for the whole topmost row. 449 memset(y_dst - BPS - 1, 127, 16 + 4 + 1); 450 memset(u_dst - BPS - 1, 127, 8 + 1); 451 memset(v_dst - BPS - 1, 127, 8 + 1); 452 } 453 454 // predict and add residuals 455 456 if (dec->is_i4x4_) { // 4x4 457 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); 458 459 if (dec->mb_y_ > 0) { 460 if (dec->mb_x_ >= dec->mb_w_ - 1) { // on rightmost border 461 top_right[0] = top_y[15] * 0x01010101u; 462 } else { 463 memcpy(top_right, top_y + 16, sizeof(*top_right)); 464 } 465 } 466 // replicate the top-right pixels below 467 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; 468 469 // predict and add residues for all 4x4 blocks in turn. 470 for (n = 0; n < 16; n++) { 471 uint8_t* const dst = y_dst + kScan[n]; 472 VP8PredLuma4[dec->imodes_[n]](dst); 473 if (dec->non_zero_ac_ & (1 << n)) { 474 VP8Transform(coeffs + n * 16, dst, 0); 475 } else if (dec->non_zero_ & (1 << n)) { // only DC is present 476 VP8TransformDC(coeffs + n * 16, dst); 477 } 478 } 479 } else { // 16x16 480 const int pred_func = CheckMode(dec, dec->imodes_[0]); 481 VP8PredLuma16[pred_func](y_dst); 482 if (dec->non_zero_) { 483 for (n = 0; n < 16; n++) { 484 uint8_t* const dst = y_dst + kScan[n]; 485 if (dec->non_zero_ac_ & (1 << n)) { 486 VP8Transform(coeffs + n * 16, dst, 0); 487 } else if (dec->non_zero_ & (1 << n)) { // only DC is present 488 VP8TransformDC(coeffs + n * 16, dst); 489 } 490 } 491 } 492 } 493 { 494 // Chroma 495 const int pred_func = CheckMode(dec, dec->uvmode_); 496 VP8PredChroma8[pred_func](u_dst); 497 VP8PredChroma8[pred_func](v_dst); 498 499 if (dec->non_zero_ & 0x0f0000) { // chroma-U 500 const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16; 501 if (dec->non_zero_ac_ & 0x0f0000) { 502 VP8TransformUV(u_coeffs, u_dst); 503 } else { 504 VP8TransformDCUV(u_coeffs, u_dst); 505 } 506 } 507 if (dec->non_zero_ & 0xf00000) { // chroma-V 508 const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16; 509 if (dec->non_zero_ac_ & 0xf00000) { 510 VP8TransformUV(v_coeffs, v_dst); 511 } else { 512 VP8TransformDCUV(v_coeffs, v_dst); 513 } 514 } 515 516 // stash away top samples for next block 517 if (dec->mb_y_ < dec->mb_h_ - 1) { 518 memcpy(top_y, y_dst + 15 * BPS, 16); 519 memcpy(top_u, u_dst + 7 * BPS, 8); 520 memcpy(top_v, v_dst + 7 * BPS, 8); 521 } 522 } 523 } 524 } 525 526 //------------------------------------------------------------------------------ 527 528 #if defined(__cplusplus) || defined(c_plusplus) 529 } // extern "C" 530 #endif 531