1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 #include "mp4def.h" 19 #include "mp4lib_int.h" 20 #include "mp4enc_lib.h" 21 #include "dct.h" 22 #include "m4venc_oscl.h" 23 24 /* ======================================================================== */ 25 /* Function : CodeMB_H263( ) */ 26 /* Date : 8/15/2001 */ 27 /* Purpose : Perform residue calc (only zero MV), DCT, H263 Quant/Dequant,*/ 28 /* IDCT and motion compensation.Modified from FastCodeMB() */ 29 /* Input : */ 30 /* video Video encoder data structure */ 31 /* function Approximate DCT function, scaling and threshold */ 32 /* ncoefblck Array for last nonzero coeff for speedup in VlcEncode */ 33 /* QP Combined offset from the origin to the current */ 34 /* macroblock and QP for current MB. */ 35 /* Output : */ 36 /* video->outputMB Quantized DCT coefficients. */ 37 /* currVop->yChan,uChan,vChan Reconstructed pixels */ 38 /* */ 39 /* Return : PV_STATUS */ 40 /* Modified : */ 41 /* 2/26/01 42 -modified threshold based on correlation coeff 0.75 only for mode H.263 43 -ncoefblck[] as input, to keep position of last non-zero coeff*/ 44 /* 8/10/01 45 -modified threshold based on correlation coeff 0.5 46 -used column threshold to speedup column DCT. 47 -used bitmap zigzag to speedup RunLevel(). */ 48 /* ======================================================================== */ 49 50 PV_STATUS CodeMB_H263(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[]) 51 { 52 Int sad, k, CBP, mbnum = video->mbnum; 53 Short *output, *dataBlock; 54 UChar Mode = video->headerInfo.Mode[mbnum]; 55 UChar *bitmapcol, *bitmaprow = video->bitmaprow; 56 UInt *bitmapzz ; 57 UChar shortHeader = video->vol[video->currLayer]->shortVideoHeader; 58 Int dc_scaler = 8; 59 Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q); 60 struct QPstruct QuantParam; 61 Int dctMode, DctTh1; 62 Int ColTh; 63 Int(*BlockQuantDequantH263)(Short *, Short *, struct QPstruct *, 64 UChar[], UChar *, UInt *, Int, Int, Int, UChar); 65 Int(*BlockQuantDequantH263DC)(Short *, Short *, struct QPstruct *, 66 UChar *, UInt *, Int, UChar); 67 void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int); 68 void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int); 69 void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int); 70 void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int); 71 72 /* motion comp. related var. */ 73 Vop *currVop = video->currVop; 74 VideoEncFrameIO *inputFrame = video->input; 75 Int ind_x = video->outputMB->mb_x; 76 Int ind_y = video->outputMB->mb_y; 77 Int lx = currVop->pitch; 78 Int width = currVop->width; 79 UChar *rec, *input, *pred; 80 Int offset = QP >> 5; /* QP is combined offset and QP */ 81 Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */ 82 /*****************************/ 83 84 OSCL_UNUSED_ARG(function); 85 86 output = video->outputMB->block[0]; 87 CBP = 0; 88 QP = QP & 0x1F; 89 // M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero , 7/24/01*/ 90 91 QuantParam.QPx2 = QP << 1; 92 QuantParam.QP = QP; 93 QuantParam.QPdiv2 = QP >> 1; 94 QuantParam.QPx2plus = QuantParam.QPx2 + QuantParam.QPdiv2; 95 QuantParam.Addition = QP - 1 + (QP & 0x1); 96 97 if (intra) 98 { 99 BlockDCT1x1 = &Block1x1DCTIntra; 100 BlockDCT2x2 = &Block2x2DCT_AANIntra; 101 BlockDCT4x4 = &Block4x4DCT_AANIntra; 102 BlockDCT8x8 = &BlockDCT_AANIntra; 103 BlockQuantDequantH263 = &BlockQuantDequantH263Intra; 104 BlockQuantDequantH263DC = &BlockQuantDequantH263DCIntra; 105 if (shortHeader) 106 { 107 dc_scaler = 8; 108 } 109 else 110 { 111 dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */ 112 } 113 DctTh1 = (Int)(dc_scaler * 3);//*1.829 114 ColTh = ColThIntra[QP]; 115 } 116 else 117 { 118 BlockDCT1x1 = &Block1x1DCTwSub; 119 BlockDCT2x2 = &Block2x2DCT_AANwSub; 120 BlockDCT4x4 = &Block4x4DCT_AANwSub; 121 BlockDCT8x8 = &BlockDCT_AANwSub; 122 123 BlockQuantDequantH263 = &BlockQuantDequantH263Inter; 124 BlockQuantDequantH263DC = &BlockQuantDequantH263DCInter; 125 ColTh = ColThInter[QP]; 126 DctTh1 = (Int)(16 * QP); //9*QP; 127 } 128 129 rec = currVop->yChan + offset; 130 input = inputFrame->yChan + offset; 131 if (lx != width) input -= (ind_y << 9); /* non-padded offset */ 132 133 dataBlock = video->dataBlock; 134 pred = video->predictedMB; 135 136 for (k = 0; k < 6; k++) 137 { 138 CBP <<= 1; 139 bitmapcol = video->bitmapcol[k]; 140 bitmapzz = video->bitmapzz[k]; /* 7/30/01 */ 141 if (k < 4) 142 { 143 sad = video->mot[mbnum][k+1].sad; 144 if (k&1) 145 { 146 rec += 8; 147 input += 8; 148 } 149 else if (k == 2) 150 { 151 dctMode = ((width << 3) - 8); 152 input += dctMode; 153 dctMode = ((lx << 3) - 8); 154 rec += dctMode; 155 } 156 } 157 else 158 { 159 if (k == 4) 160 { 161 rec = currVop->uChan + offsetc; 162 input = inputFrame->uChan + offsetc; 163 if (lx != width) input -= (ind_y << 7); 164 lx >>= 1; 165 width >>= 1; 166 if (intra) 167 { 168 sad = getBlockSum(input, width); 169 if (shortHeader) 170 dc_scaler = 8; 171 else 172 { 173 dc_scaler = cal_dc_scalerENC(QP, 2); /* chrominance blocks */ 174 } 175 DctTh1 = (Int)(dc_scaler * 3);//*1.829 176 } 177 else 178 sad = Sad8x8(input, pred, width); 179 } 180 else 181 { 182 rec = currVop->vChan + offsetc; 183 input = inputFrame->vChan + offsetc; 184 if (lx != width) input -= (ind_y << 7); 185 if (intra) 186 { 187 sad = getBlockSum(input, width); 188 } 189 else 190 sad = Sad8x8(input, pred, width); 191 } 192 } 193 194 if (sad < DctTh1 && !(shortHeader && intra)) /* all-zero */ 195 { /* For shortHeader intra block, DC value cannot be zero */ 196 dctMode = 0; 197 CBP |= 0; 198 ncoefblck[k] = 0; 199 } 200 else if (sad < 18*QP/*(QP<<4)*/) /* DC-only */ 201 { 202 dctMode = 1; 203 BlockDCT1x1(dataBlock, input, pred, width); 204 205 CBP |= (*BlockQuantDequantH263DC)(dataBlock, output, &QuantParam, 206 bitmaprow + k, bitmapzz, dc_scaler, shortHeader); 207 ncoefblck[k] = 1; 208 } 209 else 210 { 211 212 dataBlock[64] = ColTh; 213 214 if (sad < 22*QP/*(QP<<4)+(QP<<1)*/) /* 2x2 DCT */ 215 { 216 dctMode = 2; 217 BlockDCT2x2(dataBlock, input, pred, width); 218 ncoefblck[k] = 6; 219 } 220 else if (sad < (QP << 5)) /* 4x4 DCT */ 221 { 222 dctMode = 4; 223 BlockDCT4x4(dataBlock, input, pred, width); 224 ncoefblck[k] = 26; 225 } 226 else /* Full-DCT */ 227 { 228 dctMode = 8; 229 BlockDCT8x8(dataBlock, input, pred, width); 230 ncoefblck[k] = 64; 231 } 232 233 CBP |= (*BlockQuantDequantH263)(dataBlock, output, &QuantParam, 234 bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler, shortHeader); 235 } 236 BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | intra); 237 output += 64; 238 if (!(k&1)) 239 { 240 pred += 8; 241 } 242 else 243 { 244 pred += 120; 245 } 246 } 247 248 video->headerInfo.CBP[mbnum] = CBP; /* 5/18/2001 */ 249 return PV_SUCCESS; 250 } 251 252 #ifndef NO_MPEG_QUANT 253 /* ======================================================================== */ 254 /* Function : CodeMB_MPEG( ) */ 255 /* Date : 8/15/2001 */ 256 /* Purpose : Perform residue calc (only zero MV), DCT, MPEG Quant/Dequant,*/ 257 /* IDCT and motion compensation.Modified from FastCodeMB() */ 258 /* Input : */ 259 /* video Video encoder data structure */ 260 /* function Approximate DCT function, scaling and threshold */ 261 /* ncoefblck Array for last nonzero coeff for speedup in VlcEncode */ 262 /* QP Combined offset from the origin to the current */ 263 /* macroblock and QP for current MB. */ 264 /* Output : */ 265 /* video->outputMB Quantized DCT coefficients. */ 266 /* currVop->yChan,uChan,vChan Reconstructed pixels */ 267 /* */ 268 /* Return : PV_STATUS */ 269 /* Modified : */ 270 /* 2/26/01 271 -modified threshold based on correlation coeff 0.75 only for mode H.263 272 -ncoefblck[] as input, keep position of last non-zero coeff*/ 273 /* 8/10/01 274 -modified threshold based on correlation coeff 0.5 275 -used column threshold to speedup column DCT. 276 -used bitmap zigzag to speedup RunLevel(). */ 277 /* ======================================================================== */ 278 279 PV_STATUS CodeMB_MPEG(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[]) 280 { 281 Int sad, k, CBP, mbnum = video->mbnum; 282 Short *output, *dataBlock; 283 UChar Mode = video->headerInfo.Mode[mbnum]; 284 UChar *bitmapcol, *bitmaprow = video->bitmaprow; 285 UInt *bitmapzz ; 286 Int dc_scaler = 8; 287 Vol *currVol = video->vol[video->currLayer]; 288 Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q); 289 Int *qmat; 290 Int dctMode, DctTh1, DctTh2, DctTh3, DctTh4; 291 Int ColTh; 292 293 Int(*BlockQuantDequantMPEG)(Short *, Short *, Int, Int *, 294 UChar [], UChar *, UInt *, Int, Int, Int); 295 Int(*BlockQuantDequantMPEGDC)(Short *, Short *, Int, Int *, 296 UChar [], UChar *, UInt *, Int); 297 298 void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int); 299 void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int); 300 void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int); 301 void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int); 302 303 /* motion comp. related var. */ 304 Vop *currVop = video->currVop; 305 VideoEncFrameIO *inputFrame = video->input; 306 Int ind_x = video->outputMB->mb_x; 307 Int ind_y = video->outputMB->mb_y; 308 Int lx = currVop->pitch; 309 Int width = currVop->width; 310 UChar *rec, *input, *pred; 311 Int offset = QP >> 5; 312 Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */ 313 /*****************************/ 314 315 OSCL_UNUSED_ARG(function); 316 317 output = video->outputMB->block[0]; 318 CBP = 0; 319 QP = QP & 0x1F; 320 // M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero , 7/24/01*/ 321 322 if (intra) 323 { 324 BlockDCT1x1 = &Block1x1DCTIntra; 325 BlockDCT2x2 = &Block2x2DCT_AANIntra; 326 BlockDCT4x4 = &Block4x4DCT_AANIntra; 327 BlockDCT8x8 = &BlockDCT_AANIntra; 328 329 BlockQuantDequantMPEG = &BlockQuantDequantMPEGIntra; 330 BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCIntra; 331 dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */ 332 qmat = currVol->iqmat; 333 DctTh1 = (Int)(3 * dc_scaler);//2*dc_scaler); 334 DctTh2 = (Int)((1.25 * QP - 1) * qmat[1] * 0.45);//0.567);//0.567); 335 DctTh3 = (Int)((1.25 * QP - 1) * qmat[2] * 0.55);//1.162); /* 8/2/2001 */ 336 DctTh4 = (Int)((1.25 * QP - 1) * qmat[32] * 0.8);//1.7583);//0.7942); 337 ColTh = ColThIntra[QP]; 338 } 339 else 340 { 341 BlockDCT1x1 = &Block1x1DCTwSub; 342 BlockDCT2x2 = &Block2x2DCT_AANwSub; 343 BlockDCT4x4 = &Block4x4DCT_AANwSub; 344 BlockDCT8x8 = &BlockDCT_AANwSub; 345 346 BlockQuantDequantMPEG = &BlockQuantDequantMPEGInter; 347 BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCInter; 348 qmat = currVol->niqmat; 349 DctTh1 = (Int)(((QP << 1) - 0.5) * qmat[0] * 0.4);//0.2286);//0.3062); 350 DctTh2 = (Int)(((QP << 1) - 0.5) * qmat[1] * 0.45);//0.567);//0.4); 351 DctTh3 = (Int)(((QP << 1) - 0.5) * qmat[2] * 0.55);//1.162); /* 8/2/2001 */ 352 DctTh4 = (Int)(((QP << 1) - 0.5) * qmat[32] * 0.8);//1.7583);//0.7942); 353 ColTh = ColThInter[QP]; 354 }// get qmat, DctTh1, DctTh2, DctTh3 355 356 rec = currVop->yChan + offset; 357 input = inputFrame->yChan + offset; 358 if (lx != width) input -= (ind_y << 9); /* non-padded offset */ 359 360 dataBlock = video->dataBlock; 361 pred = video->predictedMB; 362 363 for (k = 0; k < 6; k++) 364 { 365 CBP <<= 1; 366 bitmapcol = video->bitmapcol[k]; 367 bitmapzz = video->bitmapzz[k]; /* 8/2/01 */ 368 if (k < 4) 369 {//Y block 370 sad = video->mot[mbnum][k+1].sad; 371 if (k&1) 372 { 373 rec += 8; 374 input += 8; 375 } 376 else if (k == 2) 377 { 378 dctMode = ((width << 3) - 8); 379 input += dctMode; 380 dctMode = ((lx << 3) - 8); 381 rec += dctMode; 382 } 383 } 384 else 385 {// U, V block 386 if (k == 4) 387 { 388 rec = currVop->uChan + offsetc; 389 input = inputFrame->uChan + offsetc; 390 if (lx != width) input -= (ind_y << 7); 391 lx >>= 1; 392 width >>= 1; 393 if (intra) 394 { 395 dc_scaler = cal_dc_scalerENC(QP, 2); /* luminance blocks */ 396 DctTh1 = dc_scaler * 3; 397 sad = getBlockSum(input, width); 398 } 399 else 400 sad = Sad8x8(input, pred, width); 401 } 402 else 403 { 404 rec = currVop->vChan + offsetc; 405 input = inputFrame->vChan + offsetc; 406 if (lx != width) input -= (ind_y << 7); 407 if (intra) 408 sad = getBlockSum(input, width); 409 else 410 sad = Sad8x8(input, pred, width); 411 } 412 } 413 414 if (sad < DctTh1) /* all-zero */ 415 { 416 dctMode = 0; 417 CBP |= 0; 418 ncoefblck[k] = 0; 419 } 420 else if (sad < DctTh2) /* DC-only */ 421 { 422 dctMode = 1; 423 BlockDCT1x1(dataBlock, input, pred, width); 424 425 CBP |= (*BlockQuantDequantMPEGDC)(dataBlock, output, QP, qmat, 426 bitmapcol, bitmaprow + k, bitmapzz, dc_scaler); 427 ncoefblck[k] = 1; 428 } 429 else 430 { 431 dataBlock[64] = ColTh; 432 433 if (sad < DctTh3) /* 2x2-DCT */ 434 { 435 dctMode = 2; 436 BlockDCT2x2(dataBlock, input, pred, width); 437 ncoefblck[k] = 6; 438 } 439 else if (sad < DctTh4) /* 4x4 DCT */ 440 { 441 dctMode = 4; 442 BlockDCT4x4(dataBlock, input, pred, width); 443 ncoefblck[k] = 26; 444 } 445 else /* full-DCT */ 446 { 447 dctMode = 8; 448 BlockDCT8x8(dataBlock, input, pred, width); 449 ncoefblck[k] = 64; 450 } 451 452 CBP |= (*BlockQuantDequantMPEG)(dataBlock, output, QP, qmat, 453 bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler); // 454 } 455 dctMode = 8; /* for mismatch handle */ 456 BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | (intra)); 457 458 output += 64; 459 if (!(k&1)) 460 { 461 pred += 8; 462 } 463 else 464 { 465 pred += 120; 466 } 467 } 468 469 video->headerInfo.CBP[mbnum] = CBP; /* 5/18/2001 */ 470 return PV_SUCCESS; 471 } 472 473 #endif 474 475 /* ======================================================================== */ 476 /* Function : getBlockSAV( ) */ 477 /* Date : 8/10/2000 */ 478 /* Purpose : Get SAV for one block */ 479 /* In/out : block[64] contain one block data */ 480 /* Return : */ 481 /* Modified : */ 482 /* ======================================================================== */ 483 /* can be written in MMX or SSE, 2/22/2001 */ 484 Int getBlockSAV(Short block[]) 485 { 486 Int i, val, sav = 0; 487 488 i = 8; 489 while (i--) 490 { 491 val = *block++; 492 if (val > 0) sav += val; 493 else sav -= val; 494 val = *block++; 495 if (val > 0) sav += val; 496 else sav -= val; 497 val = *block++; 498 if (val > 0) sav += val; 499 else sav -= val; 500 val = *block++; 501 if (val > 0) sav += val; 502 else sav -= val; 503 val = *block++; 504 if (val > 0) sav += val; 505 else sav -= val; 506 val = *block++; 507 if (val > 0) sav += val; 508 else sav -= val; 509 val = *block++; 510 if (val > 0) sav += val; 511 else sav -= val; 512 val = *block++; 513 if (val > 0) sav += val; 514 else sav -= val; 515 } 516 517 return sav; 518 519 } 520 521 /* ======================================================================== */ 522 /* Function : Sad8x8( ) */ 523 /* Date : 8/10/2000 */ 524 /* Purpose : Find SAD between prev block and current block */ 525 /* In/out : Previous and current frame block pointers, and frame width */ 526 /* Return : */ 527 /* Modified : */ 528 /* 8/15/01, - do 4 pixel at a time assuming 32 bit register */ 529 /* ======================================================================== */ 530 #ifdef __clang__ 531 __attribute((no_sanitize("integer"))) 532 #endif 533 Int Sad8x8(UChar *cur, UChar *prev, Int width) 534 { 535 UChar *end = cur + (width << 3); 536 Int sad = 0; 537 Int *curInt = (Int*) cur; 538 Int *prevInt = (Int*) prev; 539 Int cur1, cur2, prev1, prev2; 540 UInt mask, sgn_msk = 0x80808080; 541 Int sum2 = 0, sum4 = 0; 542 Int tmp; 543 do 544 { 545 mask = ~(0xFF00); 546 cur1 = curInt[1]; /* load cur[4..7] */ 547 cur2 = curInt[0]; 548 curInt += (width >> 2); /* load cur[0..3] and +=lx */ 549 prev1 = prevInt[1]; 550 prev2 = prevInt[0]; 551 prevInt += 4; 552 553 tmp = prev2 ^ cur2; 554 cur2 = prev2 - cur2; 555 tmp = tmp ^ cur2; /* (^)^(-) last bit is one if carry */ 556 tmp = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */ 557 if (cur2 < 0) tmp = tmp | 0x80000000; /* corcurt sign of first byte */ 558 tmp = (tmp << 8) - tmp; /* carry borrowed bytes are marked with 0x1FE */ 559 cur2 = cur2 + (tmp >> 7); /* negative bytes is added with 0xFF, -1 */ 560 cur2 = cur2 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */ 561 562 tmp = prev1 ^ cur1; 563 cur1 = prev1 - cur1; 564 tmp = tmp ^ cur1; /* (^)^(-) last bit is one if carry */ 565 tmp = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */ 566 if (cur1 < 0) tmp = tmp | 0x80000000; /* corcurt sign of first byte */ 567 tmp = (tmp << 8) - tmp; /* carry borrowed bytes are marked with 0x1FE */ 568 cur1 = cur1 + (tmp >> 7); /* negative bytes is added with 0xFF, -1 */ 569 cur1 = cur1 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */ 570 571 sum4 = sum4 + cur1; 572 cur1 = cur1 & (mask << 8); /* mask first and third bytes */ 573 sum2 = sum2 + ((UInt)cur1 >> 8); 574 sum4 = sum4 + cur2; 575 cur2 = cur2 & (mask << 8); /* mask first and third bytes */ 576 sum2 = sum2 + ((UInt)cur2 >> 8); 577 } 578 while ((uintptr_t)curInt < (uintptr_t)end); 579 580 cur1 = sum4 - (sum2 << 8); /* get even-sum */ 581 cur1 = cur1 + sum2; /* add 16 bit even-sum and odd-sum*/ 582 cur1 = cur1 + (cur1 << 16); /* add upper and lower 16 bit sum */ 583 sad = ((UInt)cur1 >> 16); /* take upper 16 bit */ 584 return sad; 585 } 586 587 /* ======================================================================== */ 588 /* Function : getBlockSum( ) */ 589 /* Date : 8/10/2000 */ 590 /* Purpose : Find summation of value within a block. */ 591 /* In/out : Pointer to current block in a frame and frame width */ 592 /* Return : */ 593 /* Modified : */ 594 /* 8/15/01, - SIMD 4 pixels at a time */ 595 /* ======================================================================== */ 596 #ifdef __clang__ 597 __attribute((no_sanitize("integer"))) 598 #endif 599 Int getBlockSum(UChar *cur, Int width) 600 { 601 Int sad = 0, sum4 = 0, sum2 = 0; 602 UChar *end = cur + (width << 3); 603 Int *curInt = (Int*)cur; 604 UInt mask = ~(0xFF00); 605 Int load1, load2; 606 607 do 608 { 609 load1 = curInt[1]; 610 load2 = curInt[0]; 611 curInt += (width >> 2); 612 sum4 += load1; 613 load1 = load1 & (mask << 8); /* even bytes */ 614 sum2 += ((UInt)load1 >> 8); /* sum even bytes, 16 bit */ 615 sum4 += load2; 616 load2 = load2 & (mask << 8); /* even bytes */ 617 sum2 += ((UInt)load2 >> 8); /* sum even bytes, 16 bit */ 618 } 619 while ((uintptr_t)curInt < (uintptr_t)end); 620 load1 = sum4 - (sum2 << 8); /* get even-sum */ 621 load1 = load1 + sum2; /* add 16 bit even-sum and odd-sum*/ 622 load1 = load1 + (load1 << 16); /* add upper and lower 16 bit sum */ 623 sad = ((UInt)load1 >> 16); /* take upper 16 bit */ 624 625 return sad; 626 } 627 628