1 // Copyright 2011 Google Inc. 2 // 3 // This code is licensed under the same terms as WebM: 4 // Software License Agreement: http://www.webmproject.org/license/software/ 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 6 // ----------------------------------------------------------------------------- 7 // 8 // WebP encoder: internal header. 9 // 10 // Author: Skal (pascal.massimino (at) gmail.com) 11 12 #ifndef WEBP_ENC_VP8ENCI_H_ 13 #define WEBP_ENC_VP8ENCI_H_ 14 15 #include "string.h" // for memcpy() 16 #include "webp/encode.h" 17 #include "bit_writer.h" 18 19 #if defined(__cplusplus) || defined(c_plusplus) 20 extern "C" { 21 #endif 22 23 //----------------------------------------------------------------------------- 24 // Various defines and enums 25 26 // version numbers 27 #define ENC_MAJ_VERSION 0 28 #define ENC_MIN_VERSION 1 29 #define ENC_REV_VERSION 2 30 31 // size of histogram used by CollectHistogram. 32 #define MAX_COEFF_THRESH 64 33 34 // intra prediction modes 35 enum { B_DC_PRED = 0, // 4x4 modes 36 B_TM_PRED = 1, 37 B_VE_PRED = 2, 38 B_HE_PRED = 3, 39 B_RD_PRED = 4, 40 B_VR_PRED = 5, 41 B_LD_PRED = 6, 42 B_VL_PRED = 7, 43 B_HD_PRED = 8, 44 B_HU_PRED = 9, 45 NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10 46 47 // Luma16 or UV modes 48 DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED, 49 H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED 50 }; 51 52 enum { NUM_MB_SEGMENTS = 4, 53 MAX_NUM_PARTITIONS = 8, 54 NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC 55 NUM_BANDS = 8, 56 NUM_CTX = 3, 57 NUM_PROBAS = 11, 58 MAX_LF_LEVELS = 64, // Maximum loop filter level 59 MAX_VARIABLE_LEVEL = 67 // last (inclusive) level with variable cost 60 }; 61 62 // YUV-cache parameters. Cache is 16-pixels wide. 63 // The original or reconstructed samples can be accessed using VP8Scan[] 64 // The predicted blocks can be accessed using offsets to yuv_p_ and 65 // the arrays VP8*ModeOffsets[]; 66 // +----+ YUV Samples area. See VP8Scan[] for accessing the blocks. 67 // Y_OFF |YYYY| <- original samples (enc->yuv_in_) 68 // |YYYY| 69 // |YYYY| 70 // |YYYY| 71 // U_OFF |UUVV| V_OFF (=U_OFF + 8) 72 // |UUVV| 73 // +----+ 74 // Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_') 75 // |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_') 76 // |YYYY| 77 // |YYYY| 78 // U_OFF |UUVV| V_OFF 79 // |UUVV| 80 // x2 (for yuv_out2_) 81 // +----+ Prediction area ('yuv_p_', size = PRED_SIZE) 82 // I16DC16 |YYYY| Intra16 predictions (16x16 block each) 83 // |YYYY| 84 // |YYYY| 85 // |YYYY| 86 // I16TM16 |YYYY| 87 // |YYYY| 88 // |YYYY| 89 // |YYYY| 90 // I16VE16 |YYYY| 91 // |YYYY| 92 // |YYYY| 93 // |YYYY| 94 // I16HE16 |YYYY| 95 // |YYYY| 96 // |YYYY| 97 // |YYYY| 98 // +----+ Chroma U/V predictions (16x8 block each) 99 // C8DC8 |UUVV| 100 // |UUVV| 101 // C8TM8 |UUVV| 102 // |UUVV| 103 // C8VE8 |UUVV| 104 // |UUVV| 105 // C8HE8 |UUVV| 106 // |UUVV| 107 // +----+ Intra 4x4 predictions (4x4 block each) 108 // |YYYY| I4DC4 I4TM4 I4VE4 I4HE4 109 // |YYYY| I4RD4 I4VR4 I4LD4 I4VL4 110 // |YY..| I4HD4 I4HU4 I4TMP 111 // +----+ 112 #define BPS 16 // this is the common stride 113 #define Y_SIZE (BPS * 16) 114 #define UV_SIZE (BPS * 8) 115 #define YUV_SIZE (Y_SIZE + UV_SIZE) 116 #define PRED_SIZE (6 * 16 * BPS + 12 * BPS) 117 #define Y_OFF (0) 118 #define U_OFF (Y_SIZE) 119 #define V_OFF (U_OFF + 8) 120 #define ALIGN_CST 15 121 #define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST) 122 123 extern const int VP8Scan[16 + 4 + 4]; // in quant.c 124 extern const int VP8UVModeOffsets[4]; // in analyze.c 125 extern const int VP8I16ModeOffsets[4]; 126 extern const int VP8I4ModeOffsets[NUM_BMODES]; 127 128 // Layout of prediction blocks 129 // intra 16x16 130 #define I16DC16 (0 * 16 * BPS) 131 #define I16TM16 (1 * 16 * BPS) 132 #define I16VE16 (2 * 16 * BPS) 133 #define I16HE16 (3 * 16 * BPS) 134 // chroma 8x8, two U/V blocks side by side (hence: 16x8 each) 135 #define C8DC8 (4 * 16 * BPS) 136 #define C8TM8 (4 * 16 * BPS + 8 * BPS) 137 #define C8VE8 (5 * 16 * BPS) 138 #define C8HE8 (5 * 16 * BPS + 8 * BPS) 139 // intra 4x4 140 #define I4DC4 (6 * 16 * BPS + 0) 141 #define I4TM4 (6 * 16 * BPS + 4) 142 #define I4VE4 (6 * 16 * BPS + 8) 143 #define I4HE4 (6 * 16 * BPS + 12) 144 #define I4RD4 (6 * 16 * BPS + 4 * BPS + 0) 145 #define I4VR4 (6 * 16 * BPS + 4 * BPS + 4) 146 #define I4LD4 (6 * 16 * BPS + 4 * BPS + 8) 147 #define I4VL4 (6 * 16 * BPS + 4 * BPS + 12) 148 #define I4HD4 (6 * 16 * BPS + 8 * BPS + 0) 149 #define I4HU4 (6 * 16 * BPS + 8 * BPS + 4) 150 #define I4TMP (6 * 16 * BPS + 8 * BPS + 8) 151 152 typedef int64_t score_t; // type used for scores, rate, distortion 153 #define MAX_COST ((score_t)0x7fffffffffffffLL) 154 155 #define QFIX 17 156 #define BIAS(b) ((b) << (QFIX - 8)) 157 // Fun fact: this is the _only_ line where we're actually being lossy and 158 // discarding bits. 159 static inline int QUANTDIV(int n, int iQ, int B) { 160 return (n * iQ + B) >> QFIX; 161 } 162 extern const uint8_t VP8Zigzag[16]; 163 164 //----------------------------------------------------------------------------- 165 // Headers 166 167 typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS]; 168 typedef uint64_t StatsArray[NUM_CTX][NUM_PROBAS][2]; 169 typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1]; 170 typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats 171 172 typedef struct VP8Encoder VP8Encoder; 173 174 // segment features 175 typedef struct { 176 int num_segments_; // Actual number of segments. 1 segment only = unused. 177 int update_map_; // whether to update the segment map or not. 178 // must be 0 if there's only 1 segment. 179 int size_; // bit-cost for transmitting the segment map 180 } VP8SegmentHeader; 181 182 // Struct collecting all frame-persistent probabilities. 183 typedef struct { 184 uint8_t segments_[3]; // probabilities for segment tree 185 uint8_t skip_proba_; // final probability of being skipped. 186 ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes 187 StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 7.4k 188 CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k 189 int use_skip_proba_; // Note: we always use skip_proba for now. 190 int nb_skip_; // number of skipped blocks 191 } VP8Proba; 192 193 // Filter parameters. Not actually used in the code (we don't perform 194 // the in-loop filtering), but filled from user's config 195 typedef struct { 196 int simple_; // filtering type: 0=complex, 1=simple 197 int level_; // base filter level [0..63] 198 int sharpness_; // [0..7] 199 int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16 200 } VP8FilterHeader; 201 202 //----------------------------------------------------------------------------- 203 // Informations about the macroblocks. 204 205 typedef struct { 206 // block type 207 uint8_t type_:2; // 0=i4x4, 1=i16x16 208 uint8_t uv_mode_:2; 209 uint8_t skip_:1; 210 uint8_t segment_:2; 211 uint8_t alpha_; // quantization-susceptibility 212 } VP8MBInfo; 213 214 typedef struct { 215 uint16_t q_[16]; // quantizer steps 216 uint16_t iq_[16]; // reciprocals, fixed point. 217 uint16_t bias_[16]; // rounding bias 218 uint16_t zthresh_[16]; // value under which a coefficient is zeroed 219 uint16_t sharpen_[16]; // frequency boosters for slight sharpening 220 } VP8Matrix; 221 222 typedef struct { 223 VP8Matrix y1_, y2_, uv_; // quantization matrices 224 int alpha_; // quant-susceptibility, range [-127,127]. Zero is neutral. 225 // Lower values indicate a lower risk of blurriness. 226 int beta_; // filter-susceptibility, range [0,255]. 227 int quant_; // final segment quantizer. 228 int fstrength_; // final in-loop filtering strength 229 // reactivities 230 int lambda_i16_, lambda_i4_, lambda_uv_; 231 int lambda_mode_, lambda_trellis_, tlambda_; 232 int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_; 233 } VP8SegmentInfo; 234 235 // Handy transcient struct to accumulate score and info during RD-optimization 236 // and mode evaluation. 237 typedef struct { 238 score_t D, SD, R, score; // Distortion, spectral distortion, rate, score. 239 int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma. 240 int16_t y_ac_levels[16][16]; 241 int16_t uv_levels[4 + 4][16]; 242 int mode_i16; // mode number for intra16 prediction 243 int modes_i4[16]; // mode numbers for intra4 predictions 244 int mode_uv; // mode number of chroma prediction 245 uint32_t nz; // non-zero blocks 246 } VP8ModeScore; 247 248 // Iterator structure to iterate through macroblocks, pointing to the 249 // right neighbouring data (samples, predictions, contexts, ...) 250 typedef struct { 251 int x_, y_; // current macroblock 252 int y_offset_, uv_offset_; // offset to the luma / chroma planes 253 int y_stride_, uv_stride_; // respective strides 254 uint8_t* yuv_in_; // borrowed from enc_ (for now) 255 uint8_t* yuv_out_; // '' 256 uint8_t* yuv_out2_; // '' 257 uint8_t* yuv_p_; // '' 258 VP8Encoder* enc_; // back-pointer 259 VP8MBInfo* mb_; // current macroblock 260 VP8BitWriter* bw_; // current bit-writer 261 uint8_t* preds_; // intra mode predictors (4x4 blocks) 262 uint32_t* nz_; // non-zero pattern 263 uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4 264 uint8_t* i4_top_; // pointer to the current top boundary sample 265 int i4_; // current intra4x4 mode being tested 266 int top_nz_[9]; // top-non-zero context. 267 int left_nz_[9]; // left-non-zero. left_nz[8] is independent. 268 uint64_t bit_count_[4][3]; // bit counters for coded levels. 269 uint64_t luma_bits_; // macroblock bit-cost for luma 270 uint64_t uv_bits_; // macroblock bit-cost for chroma 271 LFStats* lf_stats_; // filter stats (borrowed from enc_) 272 int do_trellis_; // if true, perform extra level optimisation 273 int done_; // true when scan is finished 274 } VP8EncIterator; 275 276 // in iterator.c 277 // must be called first. 278 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it); 279 // restart a scan. 280 void VP8IteratorReset(VP8EncIterator* const it); 281 // import samples from source 282 void VP8IteratorImport(const VP8EncIterator* const it); 283 // export decimated samples 284 void VP8IteratorExport(const VP8EncIterator* const it); 285 // go to next macroblock. Returns !done_. If *block_to_save is non-null, will 286 // save the boundary values to top_/left_ arrays. block_to_save can be 287 // it->yuv_out_ or it->yuv_in_. 288 int VP8IteratorNext(VP8EncIterator* const it, 289 const uint8_t* const block_to_save); 290 // Intra4x4 iterations 291 void VP8IteratorStartI4(VP8EncIterator* const it); 292 // returns true if not done. 293 int VP8IteratorRotateI4(VP8EncIterator* const it, 294 const uint8_t* const yuv_out); 295 296 // Non-zero context setup/teardown 297 void VP8IteratorNzToBytes(VP8EncIterator* const it); 298 void VP8IteratorBytesToNz(VP8EncIterator* const it); 299 300 // Helper functions to set mode properties 301 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode); 302 void VP8SetIntra4Mode(const VP8EncIterator* const it, int modes[16]); 303 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode); 304 void VP8SetSkip(const VP8EncIterator* const it, int skip); 305 void VP8SetSegment(const VP8EncIterator* const it, int segment); 306 void VP8IteratorResetCosts(VP8EncIterator* const it); 307 308 //----------------------------------------------------------------------------- 309 // VP8Encoder 310 311 struct VP8Encoder { 312 const WebPConfig* config_; // user configuration and parameters 313 WebPPicture* pic_; // input / output picture 314 315 // headers 316 VP8FilterHeader filter_hdr_; // filtering information 317 VP8SegmentHeader segment_hdr_; // segment information 318 319 int profile_; // VP8's profile, deduced from Config. 320 321 // dimension, in macroblock units. 322 int mb_w_, mb_h_; 323 int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1) 324 325 // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS) 326 int num_parts_; 327 328 // per-partition boolean decoders. 329 VP8BitWriter bw_; // part0 330 VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions 331 332 // transparency blob 333 int has_alpha_; 334 uint8_t* alpha_data_; // non-NULL if transparency is present 335 size_t alpha_data_size_; 336 337 // enhancement layer 338 int use_layer_; 339 VP8BitWriter layer_bw_; 340 uint8_t* layer_data_; 341 size_t layer_data_size_; 342 343 // quantization info (one set of DC/AC dequant factor per segment) 344 VP8SegmentInfo dqm_[NUM_MB_SEGMENTS]; 345 int base_quant_; // nominal quantizer value. Only used 346 // for relative coding of segments' quant. 347 int uv_alpha_; // U/V quantization susceptibility 348 // global offset of quantizers, shared by all segments 349 int dq_y1_dc_; 350 int dq_y2_dc_, dq_y2_ac_; 351 int dq_uv_dc_, dq_uv_ac_; 352 353 // probabilities and statistics 354 VP8Proba proba_; 355 uint64_t sse_[3]; // sum of Y/U/V squared errors for all macroblocks 356 uint64_t sse_count_; // pixel count for the sse_[] stats 357 int coded_size_; 358 int residual_bytes_[3][4]; 359 int block_count_[3]; 360 361 // quality/speed settings 362 int method_; // 0=fastest, 6=best/slowest. 363 int rd_opt_level_; // Deduced from method_. 364 365 // Memory 366 VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1) 367 uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1) 368 uint32_t* nz_; // non-zero bit context: mb_w+1 369 uint8_t* yuv_in_; // input samples 370 uint8_t* yuv_out_; // output samples 371 uint8_t* yuv_out2_; // secondary scratch out-buffer. swapped with yuv_out_. 372 uint8_t* yuv_p_; // scratch buffer for prediction 373 uint8_t *y_top_; // top luma samples. 374 uint8_t *uv_top_; // top u/v samples. 375 // U and V are packed into 16 pixels (8 U + 8 V) 376 uint8_t *y_left_; // left luma samples (adressable from index -1 to 15). 377 uint8_t *u_left_; // left u samples (adressable from index -1 to 7) 378 uint8_t *v_left_; // left v samples (adressable from index -1 to 7) 379 380 LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off) 381 }; 382 383 //----------------------------------------------------------------------------- 384 // internal functions. Not public. 385 386 // in tree.c 387 extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS]; 388 extern const uint8_t 389 VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS]; 390 // Reset the token probabilities to their initial (default) values 391 void VP8DefaultProbas(VP8Encoder* const enc); 392 // Write the token probabilities 393 void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas); 394 // Writes the partition #0 modes (that is: all intra modes) 395 void VP8CodeIntraModes(VP8Encoder* const enc); 396 397 // in syntax.c 398 // Generates the final bitstream by coding the partition0 and headers, 399 // and appending an assembly of all the pre-coded token partitions. 400 // Return true if everything is ok. 401 int VP8EncWrite(VP8Encoder* const enc); 402 403 // in frame.c 404 extern const uint8_t VP8EncBands[16 + 1]; 405 // Form all the four Intra16x16 predictions in the yuv_p_ cache 406 void VP8MakeLuma16Preds(const VP8EncIterator* const it); 407 // Form all the four Chroma8x8 predictions in the yuv_p_ cache 408 void VP8MakeChroma8Preds(const VP8EncIterator* const it); 409 // Form all the ten Intra4x4 predictions in the yuv_p_ cache 410 // for the 4x4 block it->i4_ 411 void VP8MakeIntra4Preds(const VP8EncIterator* const it); 412 // Rate calculation 413 int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd); 414 int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]); 415 int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd); 416 // Main stat / coding passes 417 int VP8EncLoop(VP8Encoder* const enc); 418 int VP8StatLoop(VP8Encoder* const enc); 419 420 // in webpenc.c 421 // Assign an error code to a picture. Return false for convenience. 422 int WebPEncodingSetError(WebPPicture* const pic, WebPEncodingError error); 423 // in analysis.c 424 // Compute susceptibility based on DCT-coeff histograms: 425 // the higher, the "easier" the macroblock is to compress. 426 typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred, 427 int start_block, int end_block); 428 extern VP8CHisto VP8CollectHistogram; 429 // Main analysis loop. Decides the segmentations and complexity. 430 // Assigns a first guess for Intra16 and uvmode_ prediction modes. 431 int VP8EncAnalyze(VP8Encoder* const enc); 432 433 // in quant.c 434 // Sets up segment's quantization values, base_quant_ and filter strengths. 435 void VP8SetSegmentParams(VP8Encoder* const enc, float quality); 436 // Pick best modes and fills the levels. Returns true if skipped. 437 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt); 438 439 // in alpha.c 440 void VP8EncInitAlpha(VP8Encoder* enc); // initialize alpha compression 441 void VP8EncCodeAlphaBlock(VP8EncIterator* it); // analyze or code a macroblock 442 int VP8EncFinishAlpha(VP8Encoder* enc); // finalize compressed data 443 void VP8EncDeleteAlpha(VP8Encoder* enc); // delete compressed data 444 445 // in layer.c 446 void VP8EncInitLayer(VP8Encoder* const enc); // init everything 447 void VP8EncCodeLayerBlock(VP8EncIterator* it); // code one more macroblock 448 int VP8EncFinishLayer(VP8Encoder* const enc); // finalize coding 449 void VP8EncDeleteLayer(VP8Encoder* enc); // reclaim memory 450 451 // in dsp.c 452 int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]); 453 454 // Transforms 455 // VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms 456 // will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4). 457 typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst, 458 int do_two); 459 typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out); 460 typedef void (*VP8WHT)(const int16_t* in, int16_t* out); 461 extern VP8Idct VP8ITransform; 462 extern VP8Fdct VP8FTransform; 463 extern VP8WHT VP8ITransformWHT; 464 extern VP8WHT VP8FTransformWHT; 465 // Predictions 466 // *dst is the destination block. *top, *top_right and *left can be NULL. 467 typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left, 468 const uint8_t* top); 469 typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top); 470 extern VP8Intra4Preds VP8EncPredLuma4; 471 extern VP8IntraPreds VP8EncPredLuma16; 472 extern VP8IntraPreds VP8EncPredChroma8; 473 474 typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref); 475 extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4; 476 typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref, 477 const uint16_t* const weights); 478 extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16; 479 480 typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst); 481 extern VP8BlockCopy VP8Copy4x4; 482 extern VP8BlockCopy VP8Copy8x8; 483 extern VP8BlockCopy VP8Copy16x16; 484 // Quantization 485 typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16], 486 int n, const VP8Matrix* const mtx); 487 extern VP8QuantizeBlock VP8EncQuantizeBlock; 488 489 typedef enum { 490 kSSE2, 491 kSSE3 492 } CPUFeature; 493 // returns true if the CPU supports the feature. 494 typedef int (*VP8CPUInfo)(CPUFeature feature); 495 extern VP8CPUInfo VP8EncGetCPUInfo; 496 497 void VP8EncDspInit(void); // must be called before using any of the above 498 499 // in filter.c 500 extern void VP8InitFilter(VP8EncIterator* const it); 501 extern void VP8StoreFilterStats(VP8EncIterator* const it); 502 extern void VP8AdjustFilterStrength(VP8EncIterator* const it); 503 504 //----------------------------------------------------------------------------- 505 506 #if defined(__cplusplus) || defined(c_plusplus) 507 } // extern "C" 508 #endif 509 510 #endif // WEBP_ENC_VP8ENCI_H_ 511