1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 #include "avcenc_lib.h" 19 20 /* subtract with the prediction and do transformation */ 21 void trans(uint8 *cur, int pitch, uint8 *predBlock, int16 *dataBlock) 22 { 23 int16 *ptr = dataBlock; 24 int r0, r1, r2, r3, j; 25 int curpitch = (uint)pitch >> 16; 26 int predpitch = (pitch & 0xFFFF); 27 28 /* horizontal */ 29 j = 4; 30 while (j > 0) 31 { 32 /* calculate the residue first */ 33 r0 = cur[0] - predBlock[0]; 34 r1 = cur[1] - predBlock[1]; 35 r2 = cur[2] - predBlock[2]; 36 r3 = cur[3] - predBlock[3]; 37 38 r0 += r3; //ptr[0] + ptr[3]; 39 r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; 40 r1 += r2; //ptr[1] + ptr[2]; 41 r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; 42 43 ptr[0] = r0 + r1; 44 ptr[2] = r0 - r1; 45 ptr[1] = (r3 << 1) + r2; 46 ptr[3] = r3 - (r2 << 1); 47 48 ptr += 16; 49 predBlock += predpitch; 50 cur += curpitch; 51 j--; 52 } 53 /* vertical */ 54 ptr = dataBlock; 55 j = 4; 56 while (j > 0) 57 { 58 r0 = ptr[0] + ptr[48]; 59 r3 = ptr[0] - ptr[48]; 60 r1 = ptr[16] + ptr[32]; 61 r2 = ptr[16] - ptr[32]; 62 63 ptr[0] = r0 + r1; 64 ptr[32] = r0 - r1; 65 ptr[16] = (r3 << 1) + r2; 66 ptr[48] = r3 - (r2 << 1); 67 68 ptr++; 69 j--; 70 } 71 72 return ; 73 } 74 75 76 /* do residue transform quant invquant, invtrans and write output out */ 77 int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost) 78 { 79 AVCCommonObj *video = encvid->common; 80 int org_pitch = encvid->currInput->pitch; 81 int pitch = video->currPic->pitch; 82 int16 *coef = video->block; 83 uint8 *pred = video->pred_block; // size 16 for a 4x4 block 84 int pred_pitch = video->pred_pitch; 85 int r0, r1, r2, r3, j, k, idx; 86 int *level, *run; 87 int Qq, Rq, q_bits, qp_const, quant; 88 int data, lev, zero_run; 89 int numcoeff; 90 91 coef += ((blkidx & 0x3) << 2) + ((blkidx >> 2) << 6); /* point to the 4x4 block */ 92 93 /* first take a 4x4 transform */ 94 /* horizontal */ 95 j = 4; 96 while (j > 0) 97 { 98 /* calculate the residue first */ 99 r0 = org[0] - pred[0]; /* OPTIMIZEABLE */ 100 r1 = org[1] - pred[1]; 101 r2 = org[2] - pred[2]; 102 r3 = org[3] - pred[3]; 103 104 r0 += r3; //ptr[0] + ptr[3]; 105 r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; 106 r1 += r2; //ptr[1] + ptr[2]; 107 r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; 108 109 coef[0] = r0 + r1; 110 coef[2] = r0 - r1; 111 coef[1] = (r3 << 1) + r2; 112 coef[3] = r3 - (r2 << 1); 113 114 coef += 16; 115 org += org_pitch; 116 pred += pred_pitch; 117 j--; 118 } 119 /* vertical */ 120 coef -= 64; 121 pred -= (pred_pitch << 2); 122 j = 4; 123 while (j > 0) /* OPTIMIZABLE */ 124 { 125 r0 = coef[0] + coef[48]; 126 r3 = coef[0] - coef[48]; 127 r1 = coef[16] + coef[32]; 128 r2 = coef[16] - coef[32]; 129 130 coef[0] = r0 + r1; 131 coef[32] = r0 - r1; 132 coef[16] = (r3 << 1) + r2; 133 coef[48] = r3 - (r2 << 1); 134 135 coef++; 136 j--; 137 } 138 139 coef -= 4; 140 141 /* quant */ 142 level = encvid->level[ras2dec[blkidx]]; 143 run = encvid->run[ras2dec[blkidx]]; 144 145 Rq = video->QPy_mod_6; 146 Qq = video->QPy_div_6; 147 qp_const = encvid->qp_const; 148 q_bits = 15 + Qq; 149 150 zero_run = 0; 151 numcoeff = 0; 152 for (k = 0; k < 16; k++) 153 { 154 idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */ 155 data = coef[idx]; 156 quant = quant_coef[Rq][k]; 157 if (data > 0) 158 { 159 lev = data * quant + qp_const; 160 } 161 else 162 { 163 lev = -data * quant + qp_const; 164 } 165 lev >>= q_bits; 166 if (lev) 167 { 168 *coef_cost += ((lev > 1) ? MAX_VALUE : COEFF_COST[DISABLE_THRESHOLDING][zero_run]); 169 170 /* dequant */ 171 quant = dequant_coefres[Rq][k]; 172 if (data > 0) 173 { 174 level[numcoeff] = lev; 175 coef[idx] = (lev * quant) << Qq; 176 } 177 else 178 { 179 level[numcoeff] = -lev; 180 coef[idx] = (-lev * quant) << Qq; 181 } 182 run[numcoeff++] = zero_run; 183 zero_run = 0; 184 } 185 else 186 { 187 zero_run++; 188 coef[idx] = 0; 189 } 190 } 191 192 if (video->currMB->mb_intra) // only do inverse transform with intra block 193 { 194 if (numcoeff) /* then do inverse transform */ 195 { 196 for (j = 4; j > 0; j--) /* horizontal */ 197 { 198 r0 = coef[0] + coef[2]; 199 r1 = coef[0] - coef[2]; 200 r2 = (coef[1] >> 1) - coef[3]; 201 r3 = coef[1] + (coef[3] >> 1); 202 203 coef[0] = r0 + r3; 204 coef[1] = r1 + r2; 205 coef[2] = r1 - r2; 206 coef[3] = r0 - r3; 207 208 coef += 16; 209 } 210 211 coef -= 64; 212 for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */ 213 { 214 r0 = coef[0] + coef[32]; 215 r1 = coef[0] - coef[32]; 216 r2 = (coef[16] >> 1) - coef[48]; 217 r3 = coef[16] + (coef[48] >> 1); 218 r0 += r3; 219 r3 = (r0 - (r3 << 1)); /* r0-r3 */ 220 r1 += r2; 221 r2 = (r1 - (r2 << 1)); /* r1-r2 */ 222 r0 += 32; 223 r1 += 32; 224 r2 += 32; 225 r3 += 32; 226 227 r0 = pred[0] + (r0 >> 6); 228 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ 229 r1 = *(pred += pred_pitch) + (r1 >> 6); 230 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ 231 r2 = *(pred += pred_pitch) + (r2 >> 6); 232 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ 233 r3 = pred[pred_pitch] + (r3 >> 6); 234 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ 235 236 *cur = r0; 237 *(cur += pitch) = r1; 238 *(cur += pitch) = r2; 239 cur[pitch] = r3; 240 cur -= (pitch << 1); 241 cur++; 242 pred -= (pred_pitch << 1); 243 pred++; 244 coef++; 245 } 246 } 247 else // copy from pred to cur 248 { 249 *((uint32*)cur) = *((uint32*)pred); 250 *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch)); 251 *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch)); 252 *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch)); 253 } 254 } 255 256 return numcoeff; 257 } 258 259 260 void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch) 261 { 262 int16 *coef, *coef8 = video->block; 263 uint8 *cur; // the same as curL 264 int b8, b4; 265 int r0, r1, r2, r3, j, blkidx; 266 267 for (b8 = 0; b8 < 4; b8++) 268 { 269 cur = curL; 270 coef = coef8; 271 272 if (currMB->CBP&(1 << b8)) 273 { 274 for (b4 = 0; b4 < 4; b4++) 275 { 276 blkidx = blkIdx2blkXY[b8][b4]; 277 /* do IDCT */ 278 if (currMB->nz_coeff[blkidx]) 279 { 280 for (j = 4; j > 0; j--) /* horizontal */ 281 { 282 r0 = coef[0] + coef[2]; 283 r1 = coef[0] - coef[2]; 284 r2 = (coef[1] >> 1) - coef[3]; 285 r3 = coef[1] + (coef[3] >> 1); 286 287 coef[0] = r0 + r3; 288 coef[1] = r1 + r2; 289 coef[2] = r1 - r2; 290 coef[3] = r0 - r3; 291 292 coef += 16; 293 } 294 295 coef -= 64; 296 for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */ 297 { 298 r0 = coef[0] + coef[32]; 299 r1 = coef[0] - coef[32]; 300 r2 = (coef[16] >> 1) - coef[48]; 301 r3 = coef[16] + (coef[48] >> 1); 302 r0 += r3; 303 r3 = (r0 - (r3 << 1)); /* r0-r3 */ 304 r1 += r2; 305 r2 = (r1 - (r2 << 1)); /* r1-r2 */ 306 r0 += 32; 307 r1 += 32; 308 r2 += 32; 309 r3 += 32; 310 311 r0 = cur[0] + (r0 >> 6); 312 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ 313 *cur = r0; 314 r1 = *(cur += picPitch) + (r1 >> 6); 315 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ 316 *cur = r1; 317 r2 = *(cur += picPitch) + (r2 >> 6); 318 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ 319 *cur = r2; 320 r3 = cur[picPitch] + (r3 >> 6); 321 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ 322 cur[picPitch] = r3; 323 324 cur -= (picPitch << 1); 325 cur++; 326 coef++; 327 } 328 cur -= 4; 329 coef -= 4; 330 } 331 if (b4&1) 332 { 333 cur += ((picPitch << 2) - 4); 334 coef += 60; 335 } 336 else 337 { 338 cur += 4; 339 coef += 4; 340 } 341 } 342 } 343 344 if (b8&1) 345 { 346 curL += ((picPitch << 3) - 8); 347 coef8 += 120; 348 } 349 else 350 { 351 curL += 8; 352 coef8 += 8; 353 } 354 } 355 356 return ; 357 } 358 359 /* performa dct, quant, iquant, idct for the entire MB */ 360 void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL) 361 { 362 AVCCommonObj *video = encvid->common; 363 int pitch = video->currPic->pitch; 364 int org_pitch = encvid->currInput->pitch; 365 AVCMacroblock *currMB = video->currMB; 366 int16 *coef = video->block; 367 uint8 *pred = encvid->pred_i16[currMB->i16Mode]; 368 int blk_x, blk_y, j, k, idx, b8, b4; 369 int r0, r1, r2, r3, m0, m1, m2 , m3; 370 int data, lev; 371 int *level, *run, zero_run, ncoeff; 372 int Rq, Qq, quant, q_bits, qp_const; 373 int offset_cur[4], offset_pred[4], offset; 374 375 /* horizontal */ 376 for (j = 16; j > 0; j--) 377 { 378 for (blk_x = 4; blk_x > 0; blk_x--) 379 { 380 /* calculate the residue first */ 381 r0 = *orgL++ - *pred++; 382 r1 = *orgL++ - *pred++; 383 r2 = *orgL++ - *pred++; 384 r3 = *orgL++ - *pred++; 385 386 r0 += r3; //ptr[0] + ptr[3]; 387 r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; 388 r1 += r2; //ptr[1] + ptr[2]; 389 r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; 390 391 *coef++ = r0 + r1; 392 *coef++ = (r3 << 1) + r2; 393 *coef++ = r0 - r1; 394 *coef++ = r3 - (r2 << 1); 395 } 396 orgL += (org_pitch - 16); 397 } 398 pred -= 256; 399 coef -= 256; 400 /* vertical */ 401 for (blk_y = 4; blk_y > 0; blk_y--) 402 { 403 for (j = 16; j > 0; j--) 404 { 405 r0 = coef[0] + coef[48]; 406 r3 = coef[0] - coef[48]; 407 r1 = coef[16] + coef[32]; 408 r2 = coef[16] - coef[32]; 409 410 coef[0] = r0 + r1; 411 coef[32] = r0 - r1; 412 coef[16] = (r3 << 1) + r2; 413 coef[48] = r3 - (r2 << 1); 414 415 coef++; 416 } 417 coef += 48; 418 } 419 420 /* then perform DC transform */ 421 coef -= 256; 422 for (j = 4; j > 0; j--) 423 { 424 r0 = coef[0] + coef[12]; 425 r3 = coef[0] - coef[12]; 426 r1 = coef[4] + coef[8]; 427 r2 = coef[4] - coef[8]; 428 429 coef[0] = r0 + r1; 430 coef[8] = r0 - r1; 431 coef[4] = r3 + r2; 432 coef[12] = r3 - r2; 433 coef += 64; 434 } 435 coef -= 256; 436 for (j = 4; j > 0; j--) 437 { 438 r0 = coef[0] + coef[192]; 439 r3 = coef[0] - coef[192]; 440 r1 = coef[64] + coef[128]; 441 r2 = coef[64] - coef[128]; 442 443 coef[0] = (r0 + r1) >> 1; 444 coef[128] = (r0 - r1) >> 1; 445 coef[64] = (r3 + r2) >> 1; 446 coef[192] = (r3 - r2) >> 1; 447 coef += 4; 448 } 449 450 coef -= 16; 451 // then quantize DC 452 level = encvid->leveldc; 453 run = encvid->rundc; 454 455 Rq = video->QPy_mod_6; 456 Qq = video->QPy_div_6; 457 quant = quant_coef[Rq][0]; 458 q_bits = 15 + Qq; 459 qp_const = encvid->qp_const; 460 461 zero_run = 0; 462 ncoeff = 0; 463 for (k = 0; k < 16; k++) /* in zigzag scan order */ 464 { 465 idx = ZIGZAG2RASTERDC[k]; 466 data = coef[idx]; 467 if (data > 0) // quant 468 { 469 lev = data * quant + (qp_const << 1); 470 } 471 else 472 { 473 lev = -data * quant + (qp_const << 1); 474 } 475 lev >>= (q_bits + 1); 476 if (lev) // dequant 477 { 478 if (data > 0) 479 { 480 level[ncoeff] = lev; 481 coef[idx] = lev; 482 } 483 else 484 { 485 level[ncoeff] = -lev; 486 coef[idx] = -lev; 487 } 488 run[ncoeff++] = zero_run; 489 zero_run = 0; 490 } 491 else 492 { 493 zero_run++; 494 coef[idx] = 0; 495 } 496 } 497 498 /* inverse transform DC */ 499 encvid->numcoefdc = ncoeff; 500 if (ncoeff) 501 { 502 quant = dequant_coefres[Rq][0]; 503 504 for (j = 0; j < 4; j++) 505 { 506 m0 = coef[0] + coef[4]; 507 m1 = coef[0] - coef[4]; 508 m2 = coef[8] + coef[12]; 509 m3 = coef[8] - coef[12]; 510 511 512 coef[0] = m0 + m2; 513 coef[4] = m0 - m2; 514 coef[8] = m1 - m3; 515 coef[12] = m1 + m3; 516 coef += 64; 517 } 518 519 coef -= 256; 520 521 if (Qq >= 2) /* this way should be faster than JM */ 522 { /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */ 523 Qq -= 2; 524 for (j = 0; j < 4; j++) 525 { 526 m0 = coef[0] + coef[64]; 527 m1 = coef[0] - coef[64]; 528 m2 = coef[128] + coef[192]; 529 m3 = coef[128] - coef[192]; 530 531 coef[0] = ((m0 + m2) * quant) << Qq; 532 coef[64] = ((m0 - m2) * quant) << Qq; 533 coef[128] = ((m1 - m3) * quant) << Qq; 534 coef[192] = ((m1 + m3) * quant) << Qq; 535 coef += 4; 536 } 537 Qq += 2; /* restore the value */ 538 } 539 else 540 { 541 Qq = 2 - Qq; 542 offset = 1 << (Qq - 1); 543 544 for (j = 0; j < 4; j++) 545 { 546 m0 = coef[0] + coef[64]; 547 m1 = coef[0] - coef[64]; 548 m2 = coef[128] + coef[192]; 549 m3 = coef[128] - coef[192]; 550 551 coef[0] = (((m0 + m2) * quant + offset) >> Qq); 552 coef[64] = (((m0 - m2) * quant + offset) >> Qq); 553 coef[128] = (((m1 - m3) * quant + offset) >> Qq); 554 coef[192] = (((m1 + m3) * quant + offset) >> Qq); 555 coef += 4; 556 } 557 Qq = 2 - Qq; /* restore the value */ 558 } 559 coef -= 16; /* back to the origin */ 560 } 561 562 /* now zigzag scan ac coefs, quant, iquant and itrans */ 563 run = encvid->run[0]; 564 level = encvid->level[0]; 565 566 /* offset btw 4x4 block */ 567 offset_cur[0] = 0; 568 offset_cur[1] = (pitch << 2) - 8; 569 570 /* offset btw 8x8 block */ 571 offset_cur[2] = 8 - (pitch << 3); 572 offset_cur[3] = -8; 573 574 /* similarly for pred */ 575 offset_pred[0] = 0; 576 offset_pred[1] = 56; 577 offset_pred[2] = -120; 578 offset_pred[3] = -8; 579 580 currMB->CBP = 0; 581 582 for (b8 = 0; b8 < 4; b8++) 583 { 584 for (b4 = 0; b4 < 4; b4++) 585 { 586 587 zero_run = 0; 588 ncoeff = 0; 589 590 for (k = 1; k < 16; k++) 591 { 592 idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */ 593 data = coef[idx]; 594 quant = quant_coef[Rq][k]; 595 if (data > 0) 596 { 597 lev = data * quant + qp_const; 598 } 599 else 600 { 601 lev = -data * quant + qp_const; 602 } 603 lev >>= q_bits; 604 if (lev) 605 { /* dequant */ 606 quant = dequant_coefres[Rq][k]; 607 if (data > 0) 608 { 609 level[ncoeff] = lev; 610 coef[idx] = (lev * quant) << Qq; 611 } 612 else 613 { 614 level[ncoeff] = -lev; 615 coef[idx] = (-lev * quant) << Qq; 616 } 617 run[ncoeff++] = zero_run; 618 zero_run = 0; 619 } 620 else 621 { 622 zero_run++; 623 coef[idx] = 0; 624 } 625 } 626 627 currMB->nz_coeff[blkIdx2blkXY[b8][b4]] = ncoeff; /* in raster scan !!! */ 628 if (ncoeff) 629 { 630 currMB->CBP |= (1 << b8); 631 632 // do inverse transform here 633 for (j = 4; j > 0; j--) 634 { 635 r0 = coef[0] + coef[2]; 636 r1 = coef[0] - coef[2]; 637 r2 = (coef[1] >> 1) - coef[3]; 638 r3 = coef[1] + (coef[3] >> 1); 639 640 coef[0] = r0 + r3; 641 coef[1] = r1 + r2; 642 coef[2] = r1 - r2; 643 coef[3] = r0 - r3; 644 645 coef += 16; 646 } 647 coef -= 64; 648 for (j = 4; j > 0; j--) 649 { 650 r0 = coef[0] + coef[32]; 651 r1 = coef[0] - coef[32]; 652 r2 = (coef[16] >> 1) - coef[48]; 653 r3 = coef[16] + (coef[48] >> 1); 654 655 r0 += r3; 656 r3 = (r0 - (r3 << 1)); /* r0-r3 */ 657 r1 += r2; 658 r2 = (r1 - (r2 << 1)); /* r1-r2 */ 659 r0 += 32; 660 r1 += 32; 661 r2 += 32; 662 r3 += 32; 663 r0 = pred[0] + (r0 >> 6); 664 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ 665 r1 = pred[16] + (r1 >> 6); 666 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ 667 r2 = pred[32] + (r2 >> 6); 668 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ 669 r3 = pred[48] + (r3 >> 6); 670 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ 671 *curL = r0; 672 *(curL += pitch) = r1; 673 *(curL += pitch) = r2; 674 curL[pitch] = r3; 675 curL -= (pitch << 1); 676 curL++; 677 pred++; 678 coef++; 679 } 680 } 681 else // do DC-only inverse 682 { 683 m0 = coef[0] + 32; 684 685 for (j = 4; j > 0; j--) 686 { 687 r0 = pred[0] + (m0 >> 6); 688 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ 689 r1 = pred[16] + (m0 >> 6); 690 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ 691 r2 = pred[32] + (m0 >> 6); 692 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ 693 r3 = pred[48] + (m0 >> 6); 694 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ 695 *curL = r0; 696 *(curL += pitch) = r1; 697 *(curL += pitch) = r2; 698 curL[pitch] = r3; 699 curL -= (pitch << 1); 700 curL++; 701 pred++; 702 } 703 coef += 4; 704 } 705 706 run += 16; // follow coding order 707 level += 16; 708 curL += offset_cur[b4&1]; 709 pred += offset_pred[b4&1]; 710 coef += offset_pred[b4&1]; 711 } 712 713 curL += offset_cur[2 + (b8&1)]; 714 pred += offset_pred[2 + (b8&1)]; 715 coef += offset_pred[2 + (b8&1)]; 716 } 717 718 return ; 719 } 720 721 722 void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr) 723 { 724 AVCCommonObj *video = encvid->common; 725 AVCMacroblock *currMB = video->currMB; 726 int org_pitch = (encvid->currInput->pitch) >> 1; 727 int pitch = (video->currPic->pitch) >> 1; 728 int pred_pitch = 16; 729 int16 *coef = video->block + 256; 730 uint8 *pred = video->pred_block; 731 int j, blk_x, blk_y, k, idx, b4; 732 int r0, r1, r2, r3, m0; 733 int Qq, Rq, qp_const, q_bits, quant; 734 int *level, *run, zero_run, ncoeff; 735 int data, lev; 736 int offset_cur[2], offset_pred[2], offset_coef[2]; 737 uint8 nz_temp[4]; 738 int coeff_cost; 739 740 if (cr) 741 { 742 coef += 8; 743 pred += 8; 744 } 745 746 if (currMB->mb_intra == 0) // inter mode 747 { 748 pred = curC; 749 pred_pitch = pitch; 750 } 751 752 /* do 4x4 transform */ 753 /* horizontal */ 754 for (j = 8; j > 0; j--) 755 { 756 for (blk_x = 2; blk_x > 0; blk_x--) 757 { 758 /* calculate the residue first */ 759 r0 = *orgC++ - *pred++; 760 r1 = *orgC++ - *pred++; 761 r2 = *orgC++ - *pred++; 762 r3 = *orgC++ - *pred++; 763 764 r0 += r3; //ptr[0] + ptr[3]; 765 r3 = r0 - (r3 << 1); //ptr[0] - ptr[3]; 766 r1 += r2; //ptr[1] + ptr[2]; 767 r2 = r1 - (r2 << 1); //ptr[1] - ptr[2]; 768 769 *coef++ = r0 + r1; 770 *coef++ = (r3 << 1) + r2; 771 *coef++ = r0 - r1; 772 *coef++ = r3 - (r2 << 1); 773 774 } 775 coef += 8; // coef pitch is 16 776 pred += (pred_pitch - 8); // pred_pitch is 16 777 orgC += (org_pitch - 8); 778 } 779 pred -= (pred_pitch << 3); 780 coef -= 128; 781 /* vertical */ 782 for (blk_y = 2; blk_y > 0; blk_y--) 783 { 784 for (j = 8; j > 0; j--) 785 { 786 r0 = coef[0] + coef[48]; 787 r3 = coef[0] - coef[48]; 788 r1 = coef[16] + coef[32]; 789 r2 = coef[16] - coef[32]; 790 791 coef[0] = r0 + r1; 792 coef[32] = r0 - r1; 793 coef[16] = (r3 << 1) + r2; 794 coef[48] = r3 - (r2 << 1); 795 796 coef++; 797 } 798 coef += 56; 799 } 800 /* then perform DC transform */ 801 coef -= 128; 802 803 /* 2x2 transform of DC components*/ 804 r0 = coef[0]; 805 r1 = coef[4]; 806 r2 = coef[64]; 807 r3 = coef[68]; 808 809 coef[0] = r0 + r1 + r2 + r3; 810 coef[4] = r0 - r1 + r2 - r3; 811 coef[64] = r0 + r1 - r2 - r3; 812 coef[68] = r0 - r1 - r2 + r3; 813 814 Qq = video->QPc_div_6; 815 Rq = video->QPc_mod_6; 816 quant = quant_coef[Rq][0]; 817 q_bits = 15 + Qq; 818 qp_const = encvid->qp_const_c; 819 820 zero_run = 0; 821 ncoeff = 0; 822 run = encvid->runcdc + (cr << 2); 823 level = encvid->levelcdc + (cr << 2); 824 825 /* in zigzag scan order */ 826 for (k = 0; k < 4; k++) 827 { 828 idx = ((k >> 1) << 6) + ((k & 1) << 2); 829 data = coef[idx]; 830 if (data > 0) 831 { 832 lev = data * quant + (qp_const << 1); 833 } 834 else 835 { 836 lev = -data * quant + (qp_const << 1); 837 } 838 lev >>= (q_bits + 1); 839 if (lev) 840 { 841 if (data > 0) 842 { 843 level[ncoeff] = lev; 844 coef[idx] = lev; 845 } 846 else 847 { 848 level[ncoeff] = -lev; 849 coef[idx] = -lev; 850 } 851 run[ncoeff++] = zero_run; 852 zero_run = 0; 853 } 854 else 855 { 856 zero_run++; 857 coef[idx] = 0; 858 } 859 } 860 861 encvid->numcoefcdc[cr] = ncoeff; 862 863 if (ncoeff) 864 { 865 currMB->CBP |= (1 << 4); // DC present 866 // do inverse transform 867 quant = dequant_coefres[Rq][0]; 868 869 r0 = coef[0] + coef[4]; 870 r1 = coef[0] - coef[4]; 871 r2 = coef[64] + coef[68]; 872 r3 = coef[64] - coef[68]; 873 874 r0 += r2; 875 r2 = r0 - (r2 << 1); 876 r1 += r3; 877 r3 = r1 - (r3 << 1); 878 879 if (Qq >= 1) 880 { 881 Qq -= 1; 882 coef[0] = (r0 * quant) << Qq; 883 coef[4] = (r1 * quant) << Qq; 884 coef[64] = (r2 * quant) << Qq; 885 coef[68] = (r3 * quant) << Qq; 886 Qq++; 887 } 888 else 889 { 890 coef[0] = (r0 * quant) >> 1; 891 coef[4] = (r1 * quant) >> 1; 892 coef[64] = (r2 * quant) >> 1; 893 coef[68] = (r3 * quant) >> 1; 894 } 895 } 896 897 /* now do AC zigzag scan, quant, iquant and itrans */ 898 if (cr) 899 { 900 run = encvid->run[20]; 901 level = encvid->level[20]; 902 } 903 else 904 { 905 run = encvid->run[16]; 906 level = encvid->level[16]; 907 } 908 909 /* offset btw 4x4 block */ 910 offset_cur[0] = 0; 911 offset_cur[1] = (pitch << 2) - 8; 912 offset_pred[0] = 0; 913 offset_pred[1] = (pred_pitch << 2) - 8; 914 offset_coef[0] = 0; 915 offset_coef[1] = 56; 916 917 coeff_cost = 0; 918 919 for (b4 = 0; b4 < 4; b4++) 920 { 921 zero_run = 0; 922 ncoeff = 0; 923 for (k = 1; k < 16; k++) /* in zigzag scan order */ 924 { 925 idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */ 926 data = coef[idx]; 927 quant = quant_coef[Rq][k]; 928 if (data > 0) 929 { 930 lev = data * quant + qp_const; 931 } 932 else 933 { 934 lev = -data * quant + qp_const; 935 } 936 lev >>= q_bits; 937 if (lev) 938 { 939 /* for RD performance*/ 940 if (lev > 1) 941 coeff_cost += MAX_VALUE; // set high cost, shall not be discarded 942 else 943 coeff_cost += COEFF_COST[DISABLE_THRESHOLDING][zero_run]; 944 945 /* dequant */ 946 quant = dequant_coefres[Rq][k]; 947 if (data > 0) 948 { 949 level[ncoeff] = lev; 950 coef[idx] = (lev * quant) << Qq; 951 } 952 else 953 { 954 level[ncoeff] = -lev; 955 coef[idx] = (-lev * quant) << Qq; 956 } 957 run[ncoeff++] = zero_run; 958 zero_run = 0; 959 } 960 else 961 { 962 zero_run++; 963 coef[idx] = 0; 964 } 965 } 966 967 nz_temp[b4] = ncoeff; // raster scan 968 969 // just advance the pointers for now, do IDCT later 970 coef += 4; 971 run += 16; 972 level += 16; 973 coef += offset_coef[b4&1]; 974 } 975 976 /* rewind the pointers */ 977 coef -= 128; 978 979 if (coeff_cost < _CHROMA_COEFF_COST_) 980 { 981 /* if it's not efficient to encode any blocks. 982 Just do DC only */ 983 /* We can reset level and run also, but setting nz to zero should be enough. */ 984 currMB->nz_coeff[16+(cr<<1)] = 0; 985 currMB->nz_coeff[17+(cr<<1)] = 0; 986 currMB->nz_coeff[20+(cr<<1)] = 0; 987 currMB->nz_coeff[21+(cr<<1)] = 0; 988 989 for (b4 = 0; b4 < 4; b4++) 990 { 991 // do DC-only inverse 992 m0 = coef[0] + 32; 993 994 for (j = 4; j > 0; j--) 995 { 996 r0 = pred[0] + (m0 >> 6); 997 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ 998 r1 = *(pred += pred_pitch) + (m0 >> 6); 999 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ 1000 r2 = pred[pred_pitch] + (m0 >> 6); 1001 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ 1002 r3 = pred[pred_pitch<<1] + (m0 >> 6); 1003 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ 1004 *curC = r0; 1005 *(curC += pitch) = r1; 1006 *(curC += pitch) = r2; 1007 curC[pitch] = r3; 1008 curC -= (pitch << 1); 1009 curC++; 1010 pred += (1 - pred_pitch); 1011 } 1012 coef += 4; 1013 curC += offset_cur[b4&1]; 1014 pred += offset_pred[b4&1]; 1015 coef += offset_coef[b4&1]; 1016 } 1017 } 1018 else // not dropping anything, continue with the IDCT 1019 { 1020 for (b4 = 0; b4 < 4; b4++) 1021 { 1022 ncoeff = nz_temp[b4] ; // in raster scan 1023 currMB->nz_coeff[16+(b4&1)+(cr<<1)+((b4>>1)<<2)] = ncoeff; // in raster scan 1024 1025 if (ncoeff) // do a check on the nonzero-coeff 1026 { 1027 currMB->CBP |= (2 << 4); 1028 1029 // do inverse transform here 1030 for (j = 4; j > 0; j--) 1031 { 1032 r0 = coef[0] + coef[2]; 1033 r1 = coef[0] - coef[2]; 1034 r2 = (coef[1] >> 1) - coef[3]; 1035 r3 = coef[1] + (coef[3] >> 1); 1036 1037 coef[0] = r0 + r3; 1038 coef[1] = r1 + r2; 1039 coef[2] = r1 - r2; 1040 coef[3] = r0 - r3; 1041 1042 coef += 16; 1043 } 1044 coef -= 64; 1045 for (j = 4; j > 0; j--) 1046 { 1047 r0 = coef[0] + coef[32]; 1048 r1 = coef[0] - coef[32]; 1049 r2 = (coef[16] >> 1) - coef[48]; 1050 r3 = coef[16] + (coef[48] >> 1); 1051 1052 r0 += r3; 1053 r3 = (r0 - (r3 << 1)); /* r0-r3 */ 1054 r1 += r2; 1055 r2 = (r1 - (r2 << 1)); /* r1-r2 */ 1056 r0 += 32; 1057 r1 += 32; 1058 r2 += 32; 1059 r3 += 32; 1060 r0 = pred[0] + (r0 >> 6); 1061 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ 1062 r1 = *(pred += pred_pitch) + (r1 >> 6); 1063 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ 1064 r2 = pred[pred_pitch] + (r2 >> 6); 1065 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ 1066 r3 = pred[pred_pitch<<1] + (r3 >> 6); 1067 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ 1068 *curC = r0; 1069 *(curC += pitch) = r1; 1070 *(curC += pitch) = r2; 1071 curC[pitch] = r3; 1072 curC -= (pitch << 1); 1073 curC++; 1074 pred += (1 - pred_pitch); 1075 coef++; 1076 } 1077 } 1078 else 1079 { 1080 // do DC-only inverse 1081 m0 = coef[0] + 32; 1082 1083 for (j = 4; j > 0; j--) 1084 { 1085 r0 = pred[0] + (m0 >> 6); 1086 if ((uint)r0 > 0xFF) r0 = 0xFF & (~(r0 >> 31)); /* clip */ 1087 r1 = *(pred += pred_pitch) + (m0 >> 6); 1088 if ((uint)r1 > 0xFF) r1 = 0xFF & (~(r1 >> 31)); /* clip */ 1089 r2 = pred[pred_pitch] + (m0 >> 6); 1090 if ((uint)r2 > 0xFF) r2 = 0xFF & (~(r2 >> 31)); /* clip */ 1091 r3 = pred[pred_pitch<<1] + (m0 >> 6); 1092 if ((uint)r3 > 0xFF) r3 = 0xFF & (~(r3 >> 31)); /* clip */ 1093 *curC = r0; 1094 *(curC += pitch) = r1; 1095 *(curC += pitch) = r2; 1096 curC[pitch] = r3; 1097 curC -= (pitch << 1); 1098 curC++; 1099 pred += (1 - pred_pitch); 1100 } 1101 coef += 4; 1102 } 1103 curC += offset_cur[b4&1]; 1104 pred += offset_pred[b4&1]; 1105 coef += offset_coef[b4&1]; 1106 } 1107 } 1108 1109 return ; 1110 } 1111 1112 1113 /* only DC transform */ 1114 int TransQuantIntra16DC(AVCEncObject *encvid) 1115 { 1116 AVCCommonObj *video = encvid->common; 1117 int16 *block = video->block; 1118 int *level = encvid->leveldc; 1119 int *run = encvid->rundc; 1120 int16 *ptr = block; 1121 int r0, r1, r2, r3, j; 1122 int Qq = video->QPy_div_6; 1123 int Rq = video->QPy_mod_6; 1124 int q_bits, qp_const, quant; 1125 int data, lev, zero_run; 1126 int k, ncoeff, idx; 1127 1128 /* DC transform */ 1129 /* horizontal */ 1130 j = 4; 1131 while (j) 1132 { 1133 r0 = ptr[0] + ptr[12]; 1134 r3 = ptr[0] - ptr[12]; 1135 r1 = ptr[4] + ptr[8]; 1136 r2 = ptr[4] - ptr[8]; 1137 1138 ptr[0] = r0 + r1; 1139 ptr[8] = r0 - r1; 1140 ptr[4] = r3 + r2; 1141 ptr[12] = r3 - r2; 1142 ptr += 64; 1143 j--; 1144 } 1145 /* vertical */ 1146 ptr = block; 1147 j = 4; 1148 while (j) 1149 { 1150 r0 = ptr[0] + ptr[192]; 1151 r3 = ptr[0] - ptr[192]; 1152 r1 = ptr[64] + ptr[128]; 1153 r2 = ptr[64] - ptr[128]; 1154 1155 ptr[0] = (r0 + r1) >> 1; 1156 ptr[128] = (r0 - r1) >> 1; 1157 ptr[64] = (r3 + r2) >> 1; 1158 ptr[192] = (r3 - r2) >> 1; 1159 ptr += 4; 1160 j--; 1161 } 1162 1163 quant = quant_coef[Rq][0]; 1164 q_bits = 15 + Qq; 1165 qp_const = (1 << q_bits) / 3; // intra 1166 1167 zero_run = 0; 1168 ncoeff = 0; 1169 1170 for (k = 0; k < 16; k++) /* in zigzag scan order */ 1171 { 1172 idx = ZIGZAG2RASTERDC[k]; 1173 data = block[idx]; 1174 if (data > 0) 1175 { 1176 lev = data * quant + (qp_const << 1); 1177 } 1178 else 1179 { 1180 lev = -data * quant + (qp_const << 1); 1181 } 1182 lev >>= (q_bits + 1); 1183 if (lev) 1184 { 1185 if (data > 0) 1186 { 1187 level[ncoeff] = lev; 1188 block[idx] = lev; 1189 } 1190 else 1191 { 1192 level[ncoeff] = -lev; 1193 block[idx] = -lev; 1194 } 1195 run[ncoeff++] = zero_run; 1196 zero_run = 0; 1197 } 1198 else 1199 { 1200 zero_run++; 1201 block[idx] = 0; 1202 } 1203 } 1204 return ncoeff; 1205 } 1206 1207 int TransQuantChromaDC(AVCEncObject *encvid, int16 *block, int slice_type, int cr) 1208 { 1209 AVCCommonObj *video = encvid->common; 1210 int *level, *run; 1211 int r0, r1, r2, r3; 1212 int Qq, Rq, q_bits, qp_const, quant; 1213 int data, lev, zero_run; 1214 int k, ncoeff, idx; 1215 1216 level = encvid->levelcdc + (cr << 2); /* cb or cr */ 1217 run = encvid->runcdc + (cr << 2); 1218 1219 /* 2x2 transform of DC components*/ 1220 r0 = block[0]; 1221 r1 = block[4]; 1222 r2 = block[64]; 1223 r3 = block[68]; 1224 1225 block[0] = r0 + r1 + r2 + r3; 1226 block[4] = r0 - r1 + r2 - r3; 1227 block[64] = r0 + r1 - r2 - r3; 1228 block[68] = r0 - r1 - r2 + r3; 1229 1230 Qq = video->QPc_div_6; 1231 Rq = video->QPc_mod_6; 1232 quant = quant_coef[Rq][0]; 1233 q_bits = 15 + Qq; 1234 if (slice_type == AVC_I_SLICE) 1235 { 1236 qp_const = (1 << q_bits) / 3; 1237 } 1238 else 1239 { 1240 qp_const = (1 << q_bits) / 6; 1241 } 1242 1243 zero_run = 0; 1244 ncoeff = 0; 1245 1246 for (k = 0; k < 4; k++) /* in zigzag scan order */ 1247 { 1248 idx = ((k >> 1) << 6) + ((k & 1) << 2); 1249 data = block[idx]; 1250 if (data > 0) 1251 { 1252 lev = data * quant + (qp_const << 1); 1253 } 1254 else 1255 { 1256 lev = -data * quant + (qp_const << 1); 1257 } 1258 lev >>= (q_bits + 1); 1259 if (lev) 1260 { 1261 if (data > 0) 1262 { 1263 level[ncoeff] = lev; 1264 block[idx] = lev; 1265 } 1266 else 1267 { 1268 level[ncoeff] = -lev; 1269 block[idx] = -lev; 1270 } 1271 run[ncoeff++] = zero_run; 1272 zero_run = 0; 1273 } 1274 else 1275 { 1276 zero_run++; 1277 block[idx] = 0; 1278 } 1279 } 1280 return ncoeff; 1281 } 1282 1283 1284