1 // Copyright 2011 Google Inc. All Rights Reserved. 2 // 3 // This code is licensed under the same terms as WebM: 4 // Software License Agreement: http://www.webmproject.org/license/software/ 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 6 // ----------------------------------------------------------------------------- 7 // 8 // WebP encoder: internal header. 9 // 10 // Author: Skal (pascal.massimino (at) gmail.com) 11 12 #ifndef WEBP_ENC_VP8ENCI_H_ 13 #define WEBP_ENC_VP8ENCI_H_ 14 15 #include <string.h> // for memcpy() 16 #include "../dsp/dsp.h" 17 #include "../utils/bit_writer.h" 18 #include "webp/encode.h" 19 20 #if defined(__cplusplus) || defined(c_plusplus) 21 extern "C" { 22 #endif 23 24 //------------------------------------------------------------------------------ 25 // Various defines and enums 26 27 // version numbers 28 #define ENC_MAJ_VERSION 0 29 #define ENC_MIN_VERSION 2 30 #define ENC_REV_VERSION 0 31 32 // size of histogram used by CollectHistogram. 33 #define MAX_COEFF_THRESH 64 34 35 // intra prediction modes 36 enum { B_DC_PRED = 0, // 4x4 modes 37 B_TM_PRED = 1, 38 B_VE_PRED = 2, 39 B_HE_PRED = 3, 40 B_RD_PRED = 4, 41 B_VR_PRED = 5, 42 B_LD_PRED = 6, 43 B_VL_PRED = 7, 44 B_HD_PRED = 8, 45 B_HU_PRED = 9, 46 NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10 47 48 // Luma16 or UV modes 49 DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED, 50 H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED 51 }; 52 53 enum { NUM_MB_SEGMENTS = 4, 54 MAX_NUM_PARTITIONS = 8, 55 NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC 56 NUM_BANDS = 8, 57 NUM_CTX = 3, 58 NUM_PROBAS = 11, 59 MAX_LF_LEVELS = 64, // Maximum loop filter level 60 MAX_VARIABLE_LEVEL = 67 // last (inclusive) level with variable cost 61 }; 62 63 // YUV-cache parameters. Cache is 16-pixels wide. 64 // The original or reconstructed samples can be accessed using VP8Scan[] 65 // The predicted blocks can be accessed using offsets to yuv_p_ and 66 // the arrays VP8*ModeOffsets[]; 67 // +----+ YUV Samples area. See VP8Scan[] for accessing the blocks. 68 // Y_OFF |YYYY| <- original samples (enc->yuv_in_) 69 // |YYYY| 70 // |YYYY| 71 // |YYYY| 72 // U_OFF |UUVV| V_OFF (=U_OFF + 8) 73 // |UUVV| 74 // +----+ 75 // Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_') 76 // |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_') 77 // |YYYY| 78 // |YYYY| 79 // U_OFF |UUVV| V_OFF 80 // |UUVV| 81 // x2 (for yuv_out2_) 82 // +----+ Prediction area ('yuv_p_', size = PRED_SIZE) 83 // I16DC16 |YYYY| Intra16 predictions (16x16 block each) 84 // |YYYY| 85 // |YYYY| 86 // |YYYY| 87 // I16TM16 |YYYY| 88 // |YYYY| 89 // |YYYY| 90 // |YYYY| 91 // I16VE16 |YYYY| 92 // |YYYY| 93 // |YYYY| 94 // |YYYY| 95 // I16HE16 |YYYY| 96 // |YYYY| 97 // |YYYY| 98 // |YYYY| 99 // +----+ Chroma U/V predictions (16x8 block each) 100 // C8DC8 |UUVV| 101 // |UUVV| 102 // C8TM8 |UUVV| 103 // |UUVV| 104 // C8VE8 |UUVV| 105 // |UUVV| 106 // C8HE8 |UUVV| 107 // |UUVV| 108 // +----+ Intra 4x4 predictions (4x4 block each) 109 // |YYYY| I4DC4 I4TM4 I4VE4 I4HE4 110 // |YYYY| I4RD4 I4VR4 I4LD4 I4VL4 111 // |YY..| I4HD4 I4HU4 I4TMP 112 // +----+ 113 #define BPS 16 // this is the common stride 114 #define Y_SIZE (BPS * 16) 115 #define UV_SIZE (BPS * 8) 116 #define YUV_SIZE (Y_SIZE + UV_SIZE) 117 #define PRED_SIZE (6 * 16 * BPS + 12 * BPS) 118 #define Y_OFF (0) 119 #define U_OFF (Y_SIZE) 120 #define V_OFF (U_OFF + 8) 121 #define ALIGN_CST 15 122 #define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST) 123 124 extern const int VP8Scan[16 + 4 + 4]; // in quant.c 125 extern const int VP8UVModeOffsets[4]; // in analyze.c 126 extern const int VP8I16ModeOffsets[4]; 127 extern const int VP8I4ModeOffsets[NUM_BMODES]; 128 129 // Layout of prediction blocks 130 // intra 16x16 131 #define I16DC16 (0 * 16 * BPS) 132 #define I16TM16 (1 * 16 * BPS) 133 #define I16VE16 (2 * 16 * BPS) 134 #define I16HE16 (3 * 16 * BPS) 135 // chroma 8x8, two U/V blocks side by side (hence: 16x8 each) 136 #define C8DC8 (4 * 16 * BPS) 137 #define C8TM8 (4 * 16 * BPS + 8 * BPS) 138 #define C8VE8 (5 * 16 * BPS) 139 #define C8HE8 (5 * 16 * BPS + 8 * BPS) 140 // intra 4x4 141 #define I4DC4 (6 * 16 * BPS + 0) 142 #define I4TM4 (6 * 16 * BPS + 4) 143 #define I4VE4 (6 * 16 * BPS + 8) 144 #define I4HE4 (6 * 16 * BPS + 12) 145 #define I4RD4 (6 * 16 * BPS + 4 * BPS + 0) 146 #define I4VR4 (6 * 16 * BPS + 4 * BPS + 4) 147 #define I4LD4 (6 * 16 * BPS + 4 * BPS + 8) 148 #define I4VL4 (6 * 16 * BPS + 4 * BPS + 12) 149 #define I4HD4 (6 * 16 * BPS + 8 * BPS + 0) 150 #define I4HU4 (6 * 16 * BPS + 8 * BPS + 4) 151 #define I4TMP (6 * 16 * BPS + 8 * BPS + 8) 152 153 typedef int64_t score_t; // type used for scores, rate, distortion 154 #define MAX_COST ((score_t)0x7fffffffffffffLL) 155 156 #define QFIX 17 157 #define BIAS(b) ((b) << (QFIX - 8)) 158 // Fun fact: this is the _only_ line where we're actually being lossy and 159 // discarding bits. 160 static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) { 161 return (n * iQ + B) >> QFIX; 162 } 163 extern const uint8_t VP8Zigzag[16]; 164 165 //------------------------------------------------------------------------------ 166 // Headers 167 168 typedef uint32_t proba_t; // 16b + 16b 169 typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS]; 170 typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS]; 171 typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1]; 172 typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats 173 174 typedef struct VP8Encoder VP8Encoder; 175 176 // segment features 177 typedef struct { 178 int num_segments_; // Actual number of segments. 1 segment only = unused. 179 int update_map_; // whether to update the segment map or not. 180 // must be 0 if there's only 1 segment. 181 int size_; // bit-cost for transmitting the segment map 182 } VP8SegmentHeader; 183 184 // Struct collecting all frame-persistent probabilities. 185 typedef struct { 186 uint8_t segments_[3]; // probabilities for segment tree 187 uint8_t skip_proba_; // final probability of being skipped. 188 ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes 189 StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes 190 CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k 191 int dirty_; // if true, need to call VP8CalculateLevelCosts() 192 int use_skip_proba_; // Note: we always use skip_proba for now. 193 int nb_skip_; // number of skipped blocks 194 } VP8Proba; 195 196 // Filter parameters. Not actually used in the code (we don't perform 197 // the in-loop filtering), but filled from user's config 198 typedef struct { 199 int simple_; // filtering type: 0=complex, 1=simple 200 int level_; // base filter level [0..63] 201 int sharpness_; // [0..7] 202 int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16 203 } VP8FilterHeader; 204 205 //------------------------------------------------------------------------------ 206 // Informations about the macroblocks. 207 208 typedef struct { 209 // block type 210 unsigned int type_:2; // 0=i4x4, 1=i16x16 211 unsigned int uv_mode_:2; 212 unsigned int skip_:1; 213 unsigned int segment_:2; 214 uint8_t alpha_; // quantization-susceptibility 215 } VP8MBInfo; 216 217 typedef struct VP8Matrix { 218 uint16_t q_[16]; // quantizer steps 219 uint16_t iq_[16]; // reciprocals, fixed point. 220 uint16_t bias_[16]; // rounding bias 221 uint16_t zthresh_[16]; // value under which a coefficient is zeroed 222 uint16_t sharpen_[16]; // frequency boosters for slight sharpening 223 } VP8Matrix; 224 225 typedef struct { 226 VP8Matrix y1_, y2_, uv_; // quantization matrices 227 int alpha_; // quant-susceptibility, range [-127,127]. Zero is neutral. 228 // Lower values indicate a lower risk of blurriness. 229 int beta_; // filter-susceptibility, range [0,255]. 230 int quant_; // final segment quantizer. 231 int fstrength_; // final in-loop filtering strength 232 // reactivities 233 int lambda_i16_, lambda_i4_, lambda_uv_; 234 int lambda_mode_, lambda_trellis_, tlambda_; 235 int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_; 236 } VP8SegmentInfo; 237 238 // Handy transcient struct to accumulate score and info during RD-optimization 239 // and mode evaluation. 240 typedef struct { 241 score_t D, SD, R, score; // Distortion, spectral distortion, rate, score. 242 int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma. 243 int16_t y_ac_levels[16][16]; 244 int16_t uv_levels[4 + 4][16]; 245 int mode_i16; // mode number for intra16 prediction 246 uint8_t modes_i4[16]; // mode numbers for intra4 predictions 247 int mode_uv; // mode number of chroma prediction 248 uint32_t nz; // non-zero blocks 249 } VP8ModeScore; 250 251 // Iterator structure to iterate through macroblocks, pointing to the 252 // right neighbouring data (samples, predictions, contexts, ...) 253 typedef struct { 254 int x_, y_; // current macroblock 255 int y_offset_, uv_offset_; // offset to the luma / chroma planes 256 int y_stride_, uv_stride_; // respective strides 257 uint8_t* yuv_in_; // borrowed from enc_ (for now) 258 uint8_t* yuv_out_; // '' 259 uint8_t* yuv_out2_; // '' 260 uint8_t* yuv_p_; // '' 261 VP8Encoder* enc_; // back-pointer 262 VP8MBInfo* mb_; // current macroblock 263 VP8BitWriter* bw_; // current bit-writer 264 uint8_t* preds_; // intra mode predictors (4x4 blocks) 265 uint32_t* nz_; // non-zero pattern 266 uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4 267 uint8_t* i4_top_; // pointer to the current top boundary sample 268 int i4_; // current intra4x4 mode being tested 269 int top_nz_[9]; // top-non-zero context. 270 int left_nz_[9]; // left-non-zero. left_nz[8] is independent. 271 uint64_t bit_count_[4][3]; // bit counters for coded levels. 272 uint64_t luma_bits_; // macroblock bit-cost for luma 273 uint64_t uv_bits_; // macroblock bit-cost for chroma 274 LFStats* lf_stats_; // filter stats (borrowed from enc_) 275 int do_trellis_; // if true, perform extra level optimisation 276 int done_; // true when scan is finished 277 int percent0_; // saved initial progress percent 278 } VP8EncIterator; 279 280 // in iterator.c 281 // must be called first. 282 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it); 283 // restart a scan. 284 void VP8IteratorReset(VP8EncIterator* const it); 285 // import samples from source 286 void VP8IteratorImport(const VP8EncIterator* const it); 287 // export decimated samples 288 void VP8IteratorExport(const VP8EncIterator* const it); 289 // go to next macroblock. Returns !done_. If *block_to_save is non-null, will 290 // save the boundary values to top_/left_ arrays. block_to_save can be 291 // it->yuv_out_ or it->yuv_in_. 292 int VP8IteratorNext(VP8EncIterator* const it, 293 const uint8_t* const block_to_save); 294 // Report progression based on macroblock rows. Return 0 for user-abort request. 295 int VP8IteratorProgress(const VP8EncIterator* const it, 296 int final_delta_percent); 297 // Intra4x4 iterations 298 void VP8IteratorStartI4(VP8EncIterator* const it); 299 // returns true if not done. 300 int VP8IteratorRotateI4(VP8EncIterator* const it, 301 const uint8_t* const yuv_out); 302 303 // Non-zero context setup/teardown 304 void VP8IteratorNzToBytes(VP8EncIterator* const it); 305 void VP8IteratorBytesToNz(VP8EncIterator* const it); 306 307 // Helper functions to set mode properties 308 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode); 309 void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes); 310 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode); 311 void VP8SetSkip(const VP8EncIterator* const it, int skip); 312 void VP8SetSegment(const VP8EncIterator* const it, int segment); 313 314 //------------------------------------------------------------------------------ 315 // Paginated token buffer 316 317 // WIP: #define USE_TOKEN_BUFFER 318 319 #ifdef USE_TOKEN_BUFFER 320 321 #define MAX_NUM_TOKEN 2048 322 323 typedef struct VP8Tokens VP8Tokens; 324 struct VP8Tokens { 325 uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit, bits 0..14: slot 326 int left_; 327 VP8Tokens* next_; 328 }; 329 330 typedef struct { 331 VP8Tokens* rows_; 332 uint16_t* tokens_; // set to (*last_)->tokens_ 333 VP8Tokens** last_; 334 int left_; 335 int error_; // true in case of malloc error 336 } VP8TBuffer; 337 338 void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer 339 int VP8TBufferNewPage(VP8TBuffer* const b); // allocate a new page 340 void VP8TBufferClear(VP8TBuffer* const b); // de-allocate memory 341 342 int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw, 343 const uint8_t* const probas); 344 345 static WEBP_INLINE int VP8AddToken(VP8TBuffer* const b, 346 int bit, int proba_idx) { 347 if (b->left_ > 0 || VP8TBufferNewPage(b)) { 348 const int slot = --b->left_; 349 b->tokens_[slot] = (bit << 15) | proba_idx; 350 } 351 return bit; 352 } 353 354 #endif // USE_TOKEN_BUFFER 355 356 //------------------------------------------------------------------------------ 357 // VP8Encoder 358 359 struct VP8Encoder { 360 const WebPConfig* config_; // user configuration and parameters 361 WebPPicture* pic_; // input / output picture 362 363 // headers 364 VP8FilterHeader filter_hdr_; // filtering information 365 VP8SegmentHeader segment_hdr_; // segment information 366 367 int profile_; // VP8's profile, deduced from Config. 368 369 // dimension, in macroblock units. 370 int mb_w_, mb_h_; 371 int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1) 372 373 // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS) 374 int num_parts_; 375 376 // per-partition boolean decoders. 377 VP8BitWriter bw_; // part0 378 VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions 379 380 int percent_; // for progress 381 382 // transparency blob 383 int has_alpha_; 384 uint8_t* alpha_data_; // non-NULL if transparency is present 385 uint32_t alpha_data_size_; 386 387 // enhancement layer 388 int use_layer_; 389 VP8BitWriter layer_bw_; 390 uint8_t* layer_data_; 391 size_t layer_data_size_; 392 393 // quantization info (one set of DC/AC dequant factor per segment) 394 VP8SegmentInfo dqm_[NUM_MB_SEGMENTS]; 395 int base_quant_; // nominal quantizer value. Only used 396 // for relative coding of segments' quant. 397 int uv_alpha_; // U/V quantization susceptibility 398 // global offset of quantizers, shared by all segments 399 int dq_y1_dc_; 400 int dq_y2_dc_, dq_y2_ac_; 401 int dq_uv_dc_, dq_uv_ac_; 402 403 // probabilities and statistics 404 VP8Proba proba_; 405 uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks 406 uint64_t sse_count_; // pixel count for the sse_[] stats 407 int coded_size_; 408 int residual_bytes_[3][4]; 409 int block_count_[3]; 410 411 // quality/speed settings 412 int method_; // 0=fastest, 6=best/slowest. 413 int rd_opt_level_; // Deduced from method_. 414 int max_i4_header_bits_; // partition #0 safeness factor 415 416 // Memory 417 VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1) 418 uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1) 419 uint32_t* nz_; // non-zero bit context: mb_w+1 420 uint8_t* yuv_in_; // input samples 421 uint8_t* yuv_out_; // output samples 422 uint8_t* yuv_out2_; // secondary scratch out-buffer. swapped with yuv_out_. 423 uint8_t* yuv_p_; // scratch buffer for prediction 424 uint8_t *y_top_; // top luma samples. 425 uint8_t *uv_top_; // top u/v samples. 426 // U and V are packed into 16 pixels (8 U + 8 V) 427 uint8_t *y_left_; // left luma samples (adressable from index -1 to 15). 428 uint8_t *u_left_; // left u samples (adressable from index -1 to 7) 429 uint8_t *v_left_; // left v samples (adressable from index -1 to 7) 430 431 LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off) 432 }; 433 434 //------------------------------------------------------------------------------ 435 // internal functions. Not public. 436 437 // in tree.c 438 extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS]; 439 extern const uint8_t 440 VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS]; 441 // Reset the token probabilities to their initial (default) values 442 void VP8DefaultProbas(VP8Encoder* const enc); 443 // Write the token probabilities 444 void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas); 445 // Writes the partition #0 modes (that is: all intra modes) 446 void VP8CodeIntraModes(VP8Encoder* const enc); 447 448 // in syntax.c 449 // Generates the final bitstream by coding the partition0 and headers, 450 // and appending an assembly of all the pre-coded token partitions. 451 // Return true if everything is ok. 452 int VP8EncWrite(VP8Encoder* const enc); 453 // Release memory allocated for bit-writing in VP8EncLoop & seq. 454 void VP8EncFreeBitWriters(VP8Encoder* const enc); 455 456 // in frame.c 457 extern const uint8_t VP8EncBands[16 + 1]; 458 // Form all the four Intra16x16 predictions in the yuv_p_ cache 459 void VP8MakeLuma16Preds(const VP8EncIterator* const it); 460 // Form all the four Chroma8x8 predictions in the yuv_p_ cache 461 void VP8MakeChroma8Preds(const VP8EncIterator* const it); 462 // Form all the ten Intra4x4 predictions in the yuv_p_ cache 463 // for the 4x4 block it->i4_ 464 void VP8MakeIntra4Preds(const VP8EncIterator* const it); 465 // Rate calculation 466 int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd); 467 int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]); 468 int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd); 469 // Main stat / coding passes 470 int VP8EncLoop(VP8Encoder* const enc); 471 int VP8StatLoop(VP8Encoder* const enc); 472 473 // in webpenc.c 474 // Assign an error code to a picture. Return false for convenience. 475 int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error); 476 int WebPReportProgress(const WebPPicture* const pic, 477 int percent, int* const percent_store); 478 479 // in analysis.c 480 // Main analysis loop. Decides the segmentations and complexity. 481 // Assigns a first guess for Intra16 and uvmode_ prediction modes. 482 int VP8EncAnalyze(VP8Encoder* const enc); 483 484 // in quant.c 485 // Sets up segment's quantization values, base_quant_ and filter strengths. 486 void VP8SetSegmentParams(VP8Encoder* const enc, float quality); 487 // Pick best modes and fills the levels. Returns true if skipped. 488 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt); 489 490 // in alpha.c 491 void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression 492 int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data 493 void VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data 494 495 // in layer.c 496 void VP8EncInitLayer(VP8Encoder* const enc); // init everything 497 void VP8EncCodeLayerBlock(VP8EncIterator* it); // code one more macroblock 498 int VP8EncFinishLayer(VP8Encoder* const enc); // finalize coding 499 void VP8EncDeleteLayer(VP8Encoder* enc); // reclaim memory 500 501 // in filter.c 502 503 // SSIM utils 504 typedef struct { 505 double w, xm, ym, xxm, xym, yym; 506 } DistoStats; 507 void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst); 508 void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1, 509 const uint8_t* src2, int stride2, 510 int W, int H, DistoStats* const stats); 511 double VP8SSIMGet(const DistoStats* const stats); 512 double VP8SSIMGetSquaredError(const DistoStats* const stats); 513 514 // autofilter 515 void VP8InitFilter(VP8EncIterator* const it); 516 void VP8StoreFilterStats(VP8EncIterator* const it); 517 void VP8AdjustFilterStrength(VP8EncIterator* const it); 518 519 //------------------------------------------------------------------------------ 520 521 #if defined(__cplusplus) || defined(c_plusplus) 522 } // extern "C" 523 #endif 524 525 #endif /* WEBP_ENC_VP8ENCI_H_ */ 526