1 // Copyright 2011 Google Inc. 2 // 3 // This code is licensed under the same terms as WebM: 4 // Software License Agreement: http://www.webmproject.org/license/software/ 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 6 // ----------------------------------------------------------------------------- 7 // 8 // VP8Iterator: block iterator 9 // 10 // Author: Skal (pascal.massimino (at) gmail.com) 11 12 #include <stdlib.h> 13 #include <string.h> 14 #include "vp8enci.h" 15 16 #if defined(__cplusplus) || defined(c_plusplus) 17 extern "C" { 18 #endif 19 20 //----------------------------------------------------------------------------- 21 // VP8Iterator 22 //----------------------------------------------------------------------------- 23 24 static void InitLeft(VP8EncIterator* const it) { 25 const VP8Encoder* const enc = it->enc_; 26 enc->y_left_[-1] = enc->u_left_[-1] = enc->v_left_[-1] = 27 (it->y_) > 0 ? 129 : 127; 28 memset(enc->y_left_, 129, 16); 29 memset(enc->u_left_, 129, 8); 30 memset(enc->v_left_, 129, 8); 31 it->left_nz_[8] = 0; 32 } 33 34 static void InitTop(VP8EncIterator* const it) { 35 const VP8Encoder* const enc = it->enc_; 36 const int top_size = enc->mb_w_ * 16; 37 memset(enc->y_top_, 127, 2 * top_size); 38 memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_)); 39 } 40 41 void VP8IteratorReset(VP8EncIterator* const it) { 42 VP8Encoder* const enc = it->enc_; 43 it->x_ = 0; 44 it->y_ = 0; 45 it->y_offset_ = 0; 46 it->uv_offset_ = 0; 47 it->mb_ = enc->mb_info_; 48 it->preds_ = enc->preds_; 49 it->nz_ = enc->nz_; 50 it->bw_ = &enc->parts_[0]; 51 it->done_ = enc->mb_w_* enc->mb_h_; 52 InitTop(it); 53 InitLeft(it); 54 memset(it->bit_count_, 0, sizeof(it->bit_count_)); 55 it->do_trellis_ = 0; 56 } 57 58 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) { 59 it->enc_ = enc; 60 it->y_stride_ = enc->pic_->y_stride; 61 it->uv_stride_ = enc->pic_->uv_stride; 62 // TODO(later): for multithreading, these should be owned by 'it'. 63 it->yuv_in_ = enc->yuv_in_; 64 it->yuv_out_ = enc->yuv_out_; 65 it->yuv_out2_ = enc->yuv_out2_; 66 it->yuv_p_ = enc->yuv_p_; 67 it->lf_stats_ = enc->lf_stats_; 68 VP8IteratorReset(it); 69 } 70 71 //----------------------------------------------------------------------------- 72 // Import the source samples into the cache. Takes care of replicating 73 // boundary pixels if necessary. 74 75 void VP8IteratorImport(const VP8EncIterator* const it) { 76 const VP8Encoder* const enc = it->enc_; 77 const int x = it->x_, y = it->y_; 78 const WebPPicture* const pic = enc->pic_; 79 const uint8_t* ysrc = pic->y + (y * pic->y_stride + x) * 16; 80 const uint8_t* usrc = pic->u + (y * pic->uv_stride + x) * 8; 81 const uint8_t* vsrc = pic->v + (y * pic->uv_stride + x) * 8; 82 uint8_t* ydst = it->yuv_in_ + Y_OFF; 83 uint8_t* udst = it->yuv_in_ + U_OFF; 84 uint8_t* vdst = it->yuv_in_ + V_OFF; 85 int w = (pic->width - x * 16); 86 int h = (pic->height - y * 16); 87 int i; 88 89 if (w > 16) w = 16; 90 if (h > 16) h = 16; 91 // Luma plane 92 for (i = 0; i < h; ++i) { 93 memcpy(ydst, ysrc, w); 94 if (w < 16) memset(ydst + w, ydst[w - 1], 16 - w); 95 ydst += BPS; 96 ysrc += pic->y_stride; 97 } 98 for (i = h; i < 16; ++i) { 99 memcpy(ydst, ydst - BPS, 16); 100 ydst += BPS; 101 } 102 // U/V plane 103 w = (w + 1) / 2; 104 h = (h + 1) / 2; 105 for (i = 0; i < h; ++i) { 106 memcpy(udst, usrc, w); 107 memcpy(vdst, vsrc, w); 108 if (w < 8) { 109 memset(udst + w, udst[w - 1], 8 - w); 110 memset(vdst + w, vdst[w - 1], 8 - w); 111 } 112 udst += BPS; 113 vdst += BPS; 114 usrc += pic->uv_stride; 115 vsrc += pic->uv_stride; 116 } 117 for (i = h; i < 8; ++i) { 118 memcpy(udst, udst - BPS, 8); 119 memcpy(vdst, vdst - BPS, 8); 120 udst += BPS; 121 vdst += BPS; 122 } 123 } 124 125 //----------------------------------------------------------------------------- 126 // Copy back the compressed samples into user space if requested. 127 128 void VP8IteratorExport(const VP8EncIterator* const it) { 129 const VP8Encoder* const enc = it->enc_; 130 if (enc->config_->show_compressed) { 131 const int x = it->x_, y = it->y_; 132 const uint8_t* const ysrc = it->yuv_out_ + Y_OFF; 133 const uint8_t* const usrc = it->yuv_out_ + U_OFF; 134 const uint8_t* const vsrc = it->yuv_out_ + V_OFF; 135 const WebPPicture* const pic = enc->pic_; 136 uint8_t* ydst = pic->y + (y * pic->y_stride + x) * 16; 137 uint8_t* udst = pic->u + (y * pic->uv_stride + x) * 8; 138 uint8_t* vdst = pic->v + (y * pic->uv_stride + x) * 8; 139 int w = (pic->width - x * 16); 140 int h = (pic->height - y * 16); 141 int i; 142 143 if (w > 16) w = 16; 144 if (h > 16) h = 16; 145 146 // Luma plane 147 for (i = 0; i < h; ++i) { 148 memcpy(ydst + i * pic->y_stride, ysrc + i * BPS, w); 149 } 150 // U/V plane 151 { 152 const int uv_w = (w + 1) / 2; 153 const int uv_h = (h + 1) / 2; 154 for (i = 0; i < uv_h; ++i) { 155 memcpy(udst + i * pic->uv_stride, usrc + i * BPS, uv_w); 156 memcpy(vdst + i * pic->uv_stride, vsrc + i * BPS, uv_w); 157 } 158 } 159 } 160 } 161 162 //----------------------------------------------------------------------------- 163 // Non-zero contexts setup/teardown 164 165 // Nz bits: 166 // 0 1 2 3 Y 167 // 4 5 6 7 168 // 8 9 10 11 169 // 12 13 14 15 170 // 16 17 U 171 // 18 19 172 // 20 21 V 173 // 22 23 174 // 24 DC-intra16 175 176 // Convert packed context to byte array 177 #define BIT(nz, n) (!!((nz) & (1 << (n)))) 178 179 void VP8IteratorNzToBytes(VP8EncIterator* const it) { 180 const int tnz = it->nz_[0], lnz = it->nz_[-1]; 181 182 // Top-Y 183 it->top_nz_[0] = BIT(tnz, 12); 184 it->top_nz_[1] = BIT(tnz, 13); 185 it->top_nz_[2] = BIT(tnz, 14); 186 it->top_nz_[3] = BIT(tnz, 15); 187 // Top-U 188 it->top_nz_[4] = BIT(tnz, 18); 189 it->top_nz_[5] = BIT(tnz, 19); 190 // Top-V 191 it->top_nz_[6] = BIT(tnz, 22); 192 it->top_nz_[7] = BIT(tnz, 23); 193 // DC 194 it->top_nz_[8] = BIT(tnz, 24); 195 196 // left-Y 197 it->left_nz_[0] = BIT(lnz, 3); 198 it->left_nz_[1] = BIT(lnz, 7); 199 it->left_nz_[2] = BIT(lnz, 11); 200 it->left_nz_[3] = BIT(lnz, 15); 201 // left-U 202 it->left_nz_[4] = BIT(lnz, 17); 203 it->left_nz_[5] = BIT(lnz, 19); 204 // left-V 205 it->left_nz_[6] = BIT(lnz, 21); 206 it->left_nz_[7] = BIT(lnz, 23); 207 // left-DC is special, iterated separately 208 } 209 210 void VP8IteratorBytesToNz(VP8EncIterator* const it) { 211 uint32_t nz = 0; 212 // top 213 nz |= (it->top_nz_[0] << 12) | (it->top_nz_[1] << 13); 214 nz |= (it->top_nz_[2] << 14) | (it->top_nz_[3] << 15); 215 nz |= (it->top_nz_[4] << 18) | (it->top_nz_[5] << 19); 216 nz |= (it->top_nz_[6] << 22) | (it->top_nz_[7] << 23); 217 nz |= (it->top_nz_[8] << 24); // we propagate the _top_ bit, esp. for intra4 218 // left 219 nz |= (it->left_nz_[0] << 3) | (it->left_nz_[1] << 7); 220 nz |= (it->left_nz_[2] << 11); 221 nz |= (it->left_nz_[4] << 17) | (it->left_nz_[6] << 21); 222 223 *it->nz_ = nz; 224 } 225 226 #undef BIT 227 228 //----------------------------------------------------------------------------- 229 // Advance to the next position, doing the bookeeping. 230 231 int VP8IteratorNext(VP8EncIterator* const it, 232 const uint8_t* const block_to_save) { 233 VP8Encoder* const enc = it->enc_; 234 if (block_to_save) { 235 const int x = it->x_, y = it->y_; 236 const uint8_t* const ysrc = block_to_save + Y_OFF; 237 const uint8_t* const usrc = block_to_save + U_OFF; 238 if (x < enc->mb_w_ - 1) { // left 239 int i; 240 for (i = 0; i < 16; ++i) { 241 enc->y_left_[i] = ysrc[15 + i * BPS]; 242 } 243 for (i = 0; i < 8; ++i) { 244 enc->u_left_[i] = usrc[7 + i * BPS]; 245 enc->v_left_[i] = usrc[15 + i * BPS]; 246 } 247 // top-left (before 'top'!) 248 enc->y_left_[-1] = enc->y_top_[x * 16 + 15]; 249 enc->u_left_[-1] = enc->uv_top_[x * 16 + 0 + 7]; 250 enc->v_left_[-1] = enc->uv_top_[x * 16 + 8 + 7]; 251 } 252 if (y < enc->mb_h_ - 1) { // top 253 memcpy(enc->y_top_ + x * 16, ysrc + 15 * BPS, 16); 254 memcpy(enc->uv_top_ + x * 16, usrc + 7 * BPS, 8 + 8); 255 } 256 } 257 258 it->mb_++; 259 it->preds_ += 4; 260 it->nz_++; 261 it->x_++; 262 if (it->x_ == enc->mb_w_) { 263 it->x_ = 0; 264 it->y_++; 265 it->bw_ = &enc->parts_[it->y_ & (enc->num_parts_ - 1)]; 266 it->preds_ = enc->preds_ + it->y_ * 4 * enc->preds_w_; 267 it->nz_ = enc->nz_; 268 InitLeft(it); 269 } 270 return (0 < --it->done_); 271 } 272 273 //----------------------------------------------------------------------------- 274 // Helper function to set mode properties 275 276 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) { 277 int y; 278 uint8_t* preds = it->preds_; 279 for (y = 0; y < 4; ++y) { 280 memset(preds, mode, 4); 281 preds += it->enc_->preds_w_; 282 } 283 it->mb_->type_ = 1; 284 } 285 286 void VP8SetIntra4Mode(const VP8EncIterator* const it, int modes[16]) { 287 int x, y; 288 uint8_t* preds = it->preds_; 289 for (y = 0; y < 4; ++y) { 290 for (x = 0; x < 4; ++x) { 291 preds[x] = modes[x + y * 4]; 292 } 293 preds += it->enc_->preds_w_; 294 } 295 it->mb_->type_ = 0; 296 } 297 298 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) { 299 it->mb_->uv_mode_ = mode; 300 } 301 302 void VP8SetSkip(const VP8EncIterator* const it, int skip) { 303 it->mb_->skip_ = skip; 304 } 305 306 void VP8SetSegment(const VP8EncIterator* const it, int segment) { 307 it->mb_->segment_ = segment; 308 } 309 310 //----------------------------------------------------------------------------- 311 // Intra4x4 sub-blocks iteration 312 // 313 // We store and update the boundary samples into an array of 37 pixels. They 314 // are updated as we iterate and reconstructs each intra4x4 blocks in turn. 315 // The position of the samples has the following snake pattern: 316 // 317 // 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36 <- Top-right 318 // --+-----------+-----------+-----------+-----------+ 319 // 15| 19| 23| 27| 31| 320 // 14| 18| 22| 26| 30| 321 // 13| 17| 21| 25| 29| 322 // 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28| 323 // --+-----------+-----------+-----------+-----------+ 324 // 11| 15| 19| 23| 27| 325 // 10| 14| 18| 22| 26| 326 // 9| 13| 17| 21| 25| 327 // 8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24| 328 // --+-----------+-----------+-----------+-----------+ 329 // 7| 11| 15| 19| 23| 330 // 6| 10| 14| 18| 22| 331 // 5| 9| 13| 17| 21| 332 // 4| 5 6 7 8| 9 10 11 12|13 14 15 16|17 18 19 20| 333 // --+-----------+-----------+-----------+-----------+ 334 // 3| 7| 11| 15| 19| 335 // 2| 6| 10| 14| 18| 336 // 1| 5| 9| 13| 17| 337 // 0| 1 2 3 4| 5 6 7 8| 9 10 11 12|13 14 15 16| 338 // --+-----------+-----------+-----------+-----------+ 339 340 // Array to record the position of the top sample to pass to the prediction 341 // functions in dsp.c. 342 static const uint8_t VP8TopLeftI4[16] = { 343 17, 21, 25, 29, 344 13, 17, 21, 25, 345 9, 13, 17, 21, 346 5, 9, 13, 17 347 }; 348 349 void VP8IteratorStartI4(VP8EncIterator* const it) { 350 VP8Encoder* const enc = it->enc_; 351 int i; 352 353 it->i4_ = 0; // first 4x4 sub-block 354 it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0]; 355 356 // Import the boundary samples 357 for (i = 0; i < 17; ++i) { // left 358 it->i4_boundary_[i] = enc->y_left_[15 - i]; 359 } 360 for (i = 0; i < 16; ++i) { // top 361 it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i]; 362 } 363 // top-right samples have a special case on the far right of the picture 364 if (it->x_ < enc->mb_w_ - 1) { 365 for (i = 16; i < 16 + 4; ++i) { 366 it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i]; 367 } 368 } else { // else, replicate the last valid pixel four times 369 for (i = 16; i < 16 + 4; ++i) { 370 it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15]; 371 } 372 } 373 VP8IteratorNzToBytes(it); // import the non-zero context 374 } 375 376 int VP8IteratorRotateI4(VP8EncIterator* const it, 377 const uint8_t* const yuv_out) { 378 const uint8_t* const blk = yuv_out + VP8Scan[it->i4_]; 379 uint8_t* const top = it->i4_top_; 380 int i; 381 382 // Update the cache with 7 fresh samples 383 for (i = 0; i <= 3; ++i) { 384 top[-4 + i] = blk[i + 3 * BPS]; // store future top samples 385 } 386 if ((it->i4_ & 3) != 3) { // if not on the right sub-blocks #3, #7, #11, #15 387 for (i = 0; i <= 2; ++i) { // store future left samples 388 top[i] = blk[3 + (2 - i) * BPS]; 389 } 390 } else { // else replicate top-right samples, as says the specs. 391 for (i = 0; i <= 3; ++i) { 392 top[i] = top[i + 4]; 393 } 394 } 395 // move pointers to next sub-block 396 it->i4_++; 397 if (it->i4_ == 16) { // we're done 398 return 0; 399 } 400 401 it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_]; 402 return 1; 403 } 404 405 //----------------------------------------------------------------------------- 406 407 #if defined(__cplusplus) || defined(c_plusplus) 408 } // extern "C" 409 #endif 410