Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 #include "mp4lib_int.h"
     19 #include "mp4enc_lib.h"
     20 
     21 //const static Int roundtab4[] = {0,1,1,1};
     22 //const static Int roundtab8[] = {0,0,1,1,1,1,1,2};
     23 //const static Int roundtab12[] = {0,0,0,1,1,1,1,1,1,1,2,2};
     24 const static Int roundtab16[] = {0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2};
     25 
     26 #define FORWARD_MODE    1
     27 #define BACKWARD_MODE   2
     28 #define BIDIRECTION_MODE    3
     29 #define DIRECT_MODE         4
     30 
     31 #ifdef __cplusplus
     32 extern "C"
     33 {
     34 #endif
     35     /*Function Prototype */
     36     /* no-edge padding */
     37     Int EncGetPredOutside(Int xpos, Int ypos, UChar *c_prev, UChar *rec,
     38     Int width, Int height, Int rnd1);
     39 
     40     void Copy_MB_from_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int width);
     41     void Copy_B_from_Vop(UChar *comp, Int cChan[], Int width);
     42     void Copy_MB_into_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int width);
     43     void Copy_B_into_Vop(UChar *comp, Int cChan[], Int width);
     44     void get_MB(UChar *c_prev, UChar *c_prev_u  , UChar *c_prev_v,
     45                 Short mb[6][64], Int lx, Int lx_uv);
     46 
     47     Int GetPredAdvBy0x0(
     48         UChar *c_prev,      /* i */
     49         UChar *pred_block,      /* i */
     50         Int lx,     /* i */
     51         Int rnd1 /* i */
     52     );
     53 
     54     Int GetPredAdvBy0x1(
     55         UChar *c_prev,      /* i */
     56         UChar *pred_block,      /* i */
     57         Int lx,     /* i */
     58         Int rnd1 /* i */
     59     );
     60 
     61     Int GetPredAdvBy1x0(
     62         UChar *c_prev,      /* i */
     63         UChar *pred_block,      /* i */
     64         Int lx,     /* i */
     65         Int rnd1 /* i */
     66     );
     67 
     68     Int GetPredAdvBy1x1(
     69         UChar *c_prev,      /* i */
     70         UChar *pred_block,      /* i */
     71         Int lx,     /* i */
     72         Int rnd1 /* i */
     73     );
     74 
     75     static Int(*const GetPredAdvBTable[2][2])(UChar*, UChar*, Int, Int) =
     76     {
     77         {&GetPredAdvBy0x0, &GetPredAdvBy0x1},
     78         {&GetPredAdvBy1x0, &GetPredAdvBy1x1}
     79     };
     80 
     81 
     82 #ifdef __cplusplus
     83 }
     84 #endif
     85 
     86 
     87 /* ======================================================================== */
     88 /*  Function : getMotionCompensatedMB( )                                    */
     89 /*  Date     : 4/17/2001                                                    */
     90 /*  Purpose  : Get the motion compensate block into video->predictionMB     */
     91 /*              and generate video->predictionErrorMB                       */
     92 /*              modified from MBMotionComp() function in the decoder        */
     93 /*  In/out   :                                                              */
     94 /*  Return   :                                                              */
     95 /*  Modified :                                                              */
     96 /* ======================================================================== */
     97 
     98 void getMotionCompensatedMB(VideoEncData *video, Int ind_x, Int ind_y, Int offset)
     99 {
    100     Vop *prevVop = video->forwardRefVop; //reference frame
    101     Vop *currVop = video->currVop;
    102     Int mbnum = video->mbnum;       //mb index
    103     MOT *mot = video->mot[mbnum];
    104     Int ypos, xpos;
    105     UChar *c_prev, *cu_prev, *cv_prev;
    106     UChar *c_rec, *cu_rec, *cv_rec;
    107     Int height, pitch, pitch_uv, height_uv;
    108     Int mode = video->headerInfo.Mode[mbnum];  /* get mode */
    109     Int dx, dy;
    110     Int xpred, ypred;
    111     Int xsum, ysum;
    112     Int round1;
    113 
    114     OSCL_UNUSED_ARG(offset);
    115 
    116     round1 = (Int)(1 - video->currVop->roundingType);
    117 
    118     pitch  = currVop->pitch;
    119     height = currVop->height;
    120     pitch_uv  = pitch >> 1;
    121     height_uv = height >> 1;
    122 
    123     ypos = ind_y << 4 ;
    124     xpos = ind_x << 4 ;
    125 
    126     c_rec = video->predictedMB;
    127     cu_rec = video->predictedMB + 256;
    128     cv_rec = video->predictedMB + 264;
    129 
    130     if (mode == MODE_INTER || mode == MODE_INTER_Q)
    131     {
    132         /* Motion vector in x direction       */
    133         dx = mot[0].x;
    134         dy = mot[0].y;
    135 
    136         c_prev  = prevVop->yChan;
    137 
    138         xpred = (xpos << 1) + dx ;
    139         ypred = (ypos << 1) + dy ;
    140 
    141         /* Call function that performs luminance prediction */
    142         EncPrediction_INTER(xpred, ypred, c_prev, c_rec,
    143                             pitch, round1);
    144 
    145         if ((dx & 3) == 0)  dx = dx >> 1;
    146         else        dx = (dx >> 1) | 1;
    147 
    148         if ((dy & 3) == 0)      dy = dy >> 1;
    149         else        dy = (dy >> 1) | 1;
    150 
    151         xpred = xpos + dx;
    152         ypred = ypos + dy;
    153 
    154         cu_prev = prevVop->uChan;
    155         cv_prev = prevVop->vChan;
    156 
    157         EncPrediction_Chrom(xpred, ypred, cu_prev, cv_prev, cu_rec, cv_rec,
    158                             pitch_uv, (currVop->width) >> 1, height_uv, round1);
    159     }
    160 #ifndef NO_INTER4V
    161     else if (mode == MODE_INTER4V)
    162     {
    163         c_prev  = prevVop->yChan;
    164         cu_prev = prevVop->uChan;
    165         cv_prev = prevVop->vChan;
    166 
    167         EncPrediction_INTER4V(xpos, ypos, mot, c_prev, c_rec,
    168                               pitch, round1);
    169 
    170         xsum = mot[1].x + mot[2].x + mot[3].x + mot[4].x;
    171         ysum = mot[1].y + mot[2].y + mot[3].y + mot[4].y;
    172 
    173         dx = PV_SIGN(xsum) * (roundtab16[(PV_ABS(xsum)) & 0xF] +
    174                               (((PV_ABS(xsum)) >> 4) << 1));
    175         dy = PV_SIGN(ysum) * (roundtab16[(PV_ABS(ysum)) & 0xF] +
    176                               (((PV_ABS(ysum)) >> 4) << 1));
    177 
    178         ypred = ypos + dy;
    179         xpred = xpos + dx;
    180 
    181         EncPrediction_Chrom(xpred, ypred, cu_prev, cv_prev, cu_rec, cv_rec,
    182                             pitch_uv, (currVop->width) >> 1, height_uv, round1);
    183     }
    184 #endif
    185     else
    186     {
    187         ;//printf("Error, MODE_SKIPPED is not decided yet!\n");
    188     }
    189 
    190     return ;
    191 }
    192 
    193 /***************************************************************************
    194     Function:   EncPrediction_INTER
    195     Date:       04/17/2001
    196     Purpose:    Get predicted area for luminance and compensate with the residue.
    197                 Modified from luminance_pred_mode_inter() in decoder.
    198 ***************************************************************************/
    199 
    200 void EncPrediction_INTER(
    201     Int xpred,          /* i */
    202     Int ypred,          /* i */
    203     UChar *c_prev,          /* i */
    204     UChar *c_rec,       /* i */
    205     Int lx,         /* i */
    206     Int round1          /* i */
    207 )
    208 {
    209     c_prev += (xpred >> 1) + ((ypred >> 1) * lx);
    210 
    211     GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
    212 
    213     c_prev += B_SIZE;
    214     c_rec += B_SIZE;
    215 
    216     GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
    217 
    218     c_prev += (lx << 3) - B_SIZE;
    219     c_rec += (16 << 3) - B_SIZE; /* padding */
    220 
    221     GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
    222 
    223     c_prev += B_SIZE;
    224     c_rec += B_SIZE;
    225 
    226     GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
    227 
    228     return;
    229 }
    230 
    231 #ifndef NO_INTER4V
    232 /***************************************************************************
    233     Function:   EncPrediction_INTER4V
    234     Date:       04/17/2001
    235     Purpose:    Get predicted area for luminance and compensate with the residue.
    236                 Modified from luminance_pred_mode_inter4v() in decoder.
    237 ***************************************************************************/
    238 
    239 void EncPrediction_INTER4V(
    240     Int xpos,           /* i */
    241     Int ypos,           /* i */
    242     MOT *mot,           /* i */
    243     UChar *c_prev,          /* i */
    244     UChar *c_rec,           /* i */
    245     Int lx,         /* i */
    246     Int round1          /* i */
    247 )
    248 {
    249     Int ypred, xpred;
    250 
    251     xpred = (Int)((xpos << 1) + mot[1].x);
    252     ypred = (Int)((ypos << 1) + mot[1].y);
    253 
    254     GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
    255                                        c_rec, lx, round1);
    256 
    257     c_rec += B_SIZE;
    258 
    259     xpred = (Int)(((xpos + B_SIZE) << 1) + mot[2].x);
    260     ypred = (Int)((ypos << 1) + mot[2].y);
    261 
    262     GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
    263                                        c_rec, lx, round1);
    264 
    265     c_rec += (16 << 3) - B_SIZE; /* padding */
    266 
    267     xpred = (Int)((xpos << 1) + mot[3].x);
    268     ypred = (Int)(((ypos + B_SIZE) << 1) + mot[3].y);
    269 
    270     GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
    271                                        c_rec, lx, round1);
    272 
    273     c_rec += B_SIZE;
    274 
    275     xpred = (Int)(((xpos + B_SIZE) << 1) + mot[4].x);
    276     ypred = (Int)(((ypos + B_SIZE) << 1) + mot[4].y);
    277 
    278     GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
    279                                        c_rec, lx, round1);
    280 
    281     return;
    282 }
    283 #endif /* NO_INTER4V */
    284 
    285 /***************************************************************************
    286     Function:   EncPrediction_Chrom
    287     Date:       04/17/2001
    288     Purpose:    Get predicted area for chrominance and compensate with the residue.
    289                 Modified from chrominance_pred() in decoder.
    290 ***************************************************************************/
    291 
    292 void EncPrediction_Chrom(
    293     Int xpred,          /* i */
    294     Int ypred,          /* i */
    295     UChar *cu_prev,         /* i */
    296     UChar *cv_prev,         /* i */
    297     UChar *cu_rec,
    298     UChar *cv_rec,
    299     Int lx,
    300     Int width_uv,           /* i */
    301     Int height_uv,          /* i */
    302     Int round1          /* i */
    303 )
    304 {
    305     /* check whether the MV points outside the frame */
    306     /* Compute prediction for Chrominance b block (block[4]) */
    307     if (xpred >= 0 && xpred <= ((width_uv << 1) - (2*B_SIZE)) && ypred >= 0 &&
    308             ypred <= ((height_uv << 1) - (2*B_SIZE)))
    309     {
    310         /*****************************/
    311         /* (x,y) is inside the frame */
    312         /*****************************/
    313 
    314         /* Compute prediction for Chrominance b (block[4]) */
    315         GetPredAdvBTable[ypred&1][xpred&1](cu_prev + (xpred >> 1) + ((ypred >> 1)*lx),
    316                                            cu_rec, lx, round1);
    317 
    318         /* Compute prediction for Chrominance r (block[5]) */
    319         GetPredAdvBTable[ypred&1][xpred&1](cv_prev + (xpred >> 1) + ((ypred >> 1)*lx),
    320                                            cv_rec,  lx, round1);
    321     }
    322     else
    323     {
    324         /******************************/
    325         /* (x,y) is outside the frame */
    326         /******************************/
    327 
    328         /* Compute prediction for Chrominance b (block[4]) */
    329         EncGetPredOutside(xpred, ypred,
    330                           cu_prev, cu_rec,
    331                           width_uv, height_uv, round1);
    332 
    333         /* Compute prediction for Chrominance r (block[5]) */
    334         EncGetPredOutside(xpred, ypred,
    335                           cv_prev, cv_rec,
    336                           width_uv, height_uv, round1);
    337     }
    338 
    339     return;
    340 }
    341 /***************************************************************************
    342     Function:   GetPredAdvancedB
    343     Date:       04/17/2001
    344     Purpose:    Get predicted area (block) and compensate with the residue.
    345                 - modified from GetPredAdvancedBAdd in decoder.
    346     Intput/Output:
    347     Modified:
    348 ***************************************************************************/
    349 
    350 Int GetPredAdvBy0x0(
    351     UChar *prev,        /* i */
    352     UChar *rec,     /* i */
    353     Int lx,     /* i */
    354     Int rnd /* i */
    355 )
    356 {
    357     Int i;      /* loop variable */
    358     ULong  pred_word, word1, word2;
    359     Int tmp;
    360 
    361     OSCL_UNUSED_ARG(rnd);
    362 
    363     /* initialize offset to adjust pixel counter */
    364     /*    the next row; full-pel resolution      */
    365 
    366     tmp = (ULong)prev & 0x3;
    367 
    368     if (tmp == 0)  /* word-aligned */
    369     {
    370         rec -= 16; /* preset */
    371         prev -= lx;
    372 
    373         for (i = 8; i > 0; i--)
    374         {
    375             *((ULong*)(rec += 16)) = *((ULong*)(prev += lx));
    376             *((ULong*)(rec + 4)) = *((ULong*)(prev + 4));
    377         }
    378         return 1;
    379     }
    380     else if (tmp == 1) /* first position */
    381     {
    382         prev--; /* word-aligned */
    383         rec -= 16; /* preset */
    384         prev -= lx;
    385 
    386         for (i = 8; i > 0; i--)
    387         {
    388             word1 = *((ULong*)(prev += lx)); /* read 4 bytes, b4 b3 b2 b1 */
    389             word2 = *((ULong*)(prev + 4));  /* read 4 bytes, b8 b7 b6 b5 */
    390             word1 >>= 8; /* 0 b4 b3 b2 */
    391             pred_word = word1 | (word2 << 24);  /* b5 b4 b3 b2 */
    392             *((ULong*)(rec += 16)) = pred_word;
    393 
    394             word1 = *((ULong*)(prev + 8)); /* b12 b11 b10 b9 */
    395             word2 >>= 8; /* 0 b8 b7 b6 */
    396             pred_word = word2 | (word1 << 24); /* b9 b8 b7 b6 */
    397             *((ULong*)(rec + 4)) = pred_word;
    398         }
    399 
    400         return 1;
    401     }
    402     else if (tmp == 2) /* second position */
    403     {
    404         prev -= 2; /* word1-aligned */
    405         rec -= 16; /* preset */
    406         prev -= lx;
    407 
    408         for (i = 8; i > 0; i--)
    409         {
    410             word1 = *((ULong*)(prev += lx)); /* read 4 bytes, b4 b3 b2 b1 */
    411             word2 = *((ULong*)(prev + 4));  /* read 4 bytes, b8 b7 b6 b5 */
    412             word1 >>= 16; /* 0 0 b4 b3 */
    413             pred_word = word1 | (word2 << 16);  /* b6 b5 b4 b3 */
    414             *((ULong*)(rec += 16)) = pred_word;
    415 
    416             word1 = *((ULong*)(prev + 8)); /* b12 b11 b10 b9 */
    417             word2 >>= 16; /* 0 0 b8 b7 */
    418             pred_word = word2 | (word1 << 16); /* b10 b9 b8 b7 */
    419             *((ULong*)(rec + 4)) = pred_word;
    420         }
    421 
    422         return 1;
    423     }
    424     else /* third position */
    425     {
    426         prev -= 3; /* word1-aligned */
    427         rec -= 16; /* preset */
    428         prev -= lx;
    429 
    430         for (i = 8; i > 0; i--)
    431         {
    432             word1 = *((ULong*)(prev += lx)); /* read 4 bytes, b4 b3 b2 b1 */
    433             word2 = *((ULong*)(prev + 4));  /* read 4 bytes, b8 b7 b6 b5 */
    434             word1 >>= 24; /* 0 0 0 b4 */
    435             pred_word = word1 | (word2 << 8);   /* b7 b6 b5 b4 */
    436             *((ULong*)(rec += 16)) = pred_word;
    437 
    438             word1 = *((ULong*)(prev + 8)); /* b12 b11 b10 b9 */
    439             word2 >>= 24; /* 0 0 0 b8 */
    440             pred_word = word2 | (word1 << 8); /* b11 b10 b9 b8 */
    441             *((ULong*)(rec + 4)) = pred_word;
    442 
    443         }
    444 
    445         return 1;
    446     }
    447 }
    448 /**************************************************************************/
    449 Int GetPredAdvBy0x1(
    450     UChar *prev,        /* i */
    451     UChar *rec,     /* i */
    452     Int lx,     /* i */
    453     Int rnd1 /* i */
    454 )
    455 {
    456     Int i;      /* loop variable */
    457     Int offset;
    458     ULong word1, word2, word3, word12;
    459     Int tmp;
    460     ULong mask;
    461 
    462     /* initialize offset to adjust pixel counter */
    463     /*    the next row; full-pel resolution      */
    464     offset = lx - B_SIZE; /* offset for prev */
    465 
    466     /* Branch based on pixel location (half-pel or full-pel) for x and y */
    467     rec -= 12; /* preset */
    468 
    469     tmp = (ULong)prev & 3;
    470     mask = 254;
    471     mask |= (mask << 8);
    472     mask |= (mask << 16); /* 0xFEFEFEFE */
    473 
    474     if (tmp == 0) /* word-aligned */
    475     {
    476         if (rnd1 == 1)
    477         {
    478             for (i = B_SIZE; i > 0; i--)
    479             {
    480                 word1 = *((ULong*)prev); /* b4 b3 b2 b1 */
    481                 word2 = *((ULong*)(prev += 4)); /* b8 b7 b6 b5 */
    482                 word12 = (word1 >> 8); /* 0 b4 b3 b2 */
    483                 word12 |= (word2 << 24); /* b5 b4 b3 b2 */
    484                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    485                 word1 &= mask;
    486                 word3 &= (~mask); /* 0x1010101, check last bit */
    487                 word12 &= mask;
    488                 word1 >>= 1;
    489                 word1 = word1 + (word12 >> 1);
    490                 word1 += word3;
    491                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
    492 
    493                 word1 = *((ULong*)(prev += 4)); /* b12 b11 b10 b9 */
    494                 word12 = (word2 >> 8); /* 0 b8 b7 b6 */
    495                 word12 |= (word1 << 24); /* b9 b8 b7 b6 */
    496                 word3 = word2 | word12;
    497                 word2 &= mask;
    498                 word3 &= (~mask);  /* 0x1010101, check last bit */
    499                 word12 &= mask;
    500                 word2 >>= 1;
    501                 word2 = word2 + (word12 >> 1);
    502                 word2 += word3;
    503                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
    504 
    505                 prev += offset;
    506             }
    507             return 1;
    508         }
    509         else /* rnd1 == 0 */
    510         {
    511             for (i = B_SIZE; i > 0; i--)
    512             {
    513                 word1 = *((ULong*)prev); /* b4 b3 b2 b1 */
    514 
    515                 word2 = *((ULong*)(prev += 4)); /* b8 b7 b6 b5 */
    516                 word12 = (word1 >> 8); /* 0 b4 b3 b2 */
    517                 word12 |= (word2 << 24); /* b5 b4 b3 b2 */
    518                 word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
    519                 word1 &= mask;
    520                 word3 &= (~mask); /* 0x1010101, check last bit */
    521                 word12 &= mask;
    522                 word1 >>= 1;
    523                 word1 = word1 + (word12 >> 1);
    524                 word1 += word3;
    525                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
    526 
    527                 word1 = *((ULong*)(prev += 4)); /* b12 b11 b10 b9 */
    528                 word12 = (word2 >> 8); /* 0 b8 b7 b6 */
    529                 word12 |= (word1 << 24); /* b9 b8 b7 b6 */
    530                 word3 = word2 & word12;
    531                 word2 &= mask;
    532                 word3 &= (~mask);  /* 0x1010101, check last bit */
    533                 word12 &= mask;
    534                 word2 >>= 1;
    535                 word2 = word2 + (word12 >> 1);
    536                 word2 += word3;
    537                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
    538 
    539                 prev += offset;
    540             }
    541             return 1;
    542         } /* rnd1 */
    543     }
    544     else if (tmp == 1)
    545     {
    546         prev--; /* word-aligned */
    547         if (rnd1 == 1)
    548         {
    549             for (i = B_SIZE; i > 0; i--)
    550             {
    551                 word1 = *((ULong*)prev); /* b3 b2 b1 b0 */
    552                 word2 = *((ULong*)(prev += 4)); /* b7 b6 b5 b4 */
    553                 word12 = (word1 >> 8); /* 0 b3 b2 b1 */
    554                 word1 >>= 16; /* 0 0 b3 b2 */
    555                 word12 |= (word2 << 24); /* b4 b3 b2 b1 */
    556                 word1 |= (word2 << 16); /* b5 b4 b3 b2 */
    557                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    558                 word1 &= mask;
    559                 word3 &= (~mask); /* 0x1010101, check last bit */
    560                 word12 &= mask;
    561                 word1 >>= 1;
    562                 word1 = word1 + (word12 >> 1);
    563                 word1 += word3;
    564                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
    565 
    566                 word1 = *((ULong*)(prev += 4)); /* b11 b10 b9 b8 */
    567                 word12 = (word2 >> 8); /* 0 b7 b6 b5 */
    568                 word2 >>= 16; /* 0 0 b7 b6 */
    569                 word12 |= (word1 << 24); /* b8 b7 b6 b5 */
    570                 word2 |= (word1 << 16); /* b9 b8 b7 b6 */
    571                 word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word2&word12
    572                 word2 &= mask;
    573                 word3 &= (~mask); /* 0x1010101, check last bit */
    574                 word12 &= mask;
    575                 word2 >>= 1;
    576                 word2 = word2 + (word12 >> 1);
    577                 word2 += word3;
    578                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
    579 
    580                 prev += offset;
    581             }
    582             return 1;
    583         }
    584         else /* rnd1 = 0 */
    585         {
    586             for (i = B_SIZE; i > 0; i--)
    587             {
    588                 word1 = *((ULong*)prev); /* b3 b2 b1 b0 */
    589 
    590                 word2 = *((ULong*)(prev += 4)); /* b7 b6 b5 b4 */
    591                 word12 = (word1 >> 8); /* 0 b3 b2 b1 */
    592                 word1 >>= 16; /* 0 0 b3 b2 */
    593                 word12 |= (word2 << 24); /* b4 b3 b2 b1 */
    594                 word1 |= (word2 << 16); /* b5 b4 b3 b2 */
    595                 word3 = word1 & word12;
    596                 word1 &= mask;
    597                 word3 &= (~mask); /* 0x1010101, check last bit */
    598                 word12 &= mask;
    599                 word1 >>= 1;
    600                 word1 = word1 + (word12 >> 1);
    601                 word1 += word3;
    602                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
    603 
    604                 word1 = *((ULong*)(prev += 4)); /* b11 b10 b9 b8 */
    605                 word12 = (word2 >> 8); /* 0 b7 b6 b5 */
    606                 word2 >>= 16; /* 0 0 b7 b6 */
    607                 word12 |= (word1 << 24); /* b8 b7 b6 b5 */
    608                 word2 |= (word1 << 16); /* b9 b8 b7 b6 */
    609                 word3 = word2 & word12;
    610                 word2 &= mask;
    611                 word3 &= (~mask); /* 0x1010101, check last bit */
    612                 word12 &= mask;
    613                 word2 >>= 1;
    614                 word2 = word2 + (word12 >> 1);
    615                 word2 += word3;
    616                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
    617 
    618                 prev += offset;
    619             }
    620             return 1;
    621         } /* rnd1 */
    622     }
    623     else if (tmp == 2)
    624     {
    625         prev -= 2; /* word-aligned */
    626         if (rnd1 == 1)
    627         {
    628             for (i = B_SIZE; i > 0; i--)
    629             {
    630                 word1 = *((ULong*)prev); /* b2 b1 b0 bN1 */
    631                 word2 = *((ULong*)(prev += 4)); /* b6 b5 b4 b3 */
    632                 word12 = (word1 >> 16); /* 0 0 b2 b1 */
    633                 word1 >>= 24; /* 0 0 0 b2 */
    634                 word12 |= (word2 << 16); /* b4 b3 b2 b1 */
    635                 word1 |= (word2 << 8); /* b5 b4 b3 b2 */
    636                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    637                 word1 &= mask;
    638                 word3 &= (~mask); /* 0x1010101, check last bit */
    639                 word12 &= mask;
    640                 word1 >>= 1;
    641                 word1 = word1 + (word12 >> 1);
    642                 word1 += word3;
    643                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
    644 
    645                 word1 = *((ULong*)(prev += 4)); /* b10 b9 b8 b7 */
    646                 word12 = (word2 >> 16); /* 0 0 b6 b5 */
    647                 word2 >>= 24; /* 0 0 0 b6 */
    648                 word12 |= (word1 << 16); /* b8 b7 b6 b5 */
    649                 word2 |= (word1 << 8); /* b9 b8 b7 b6 */
    650                 word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    651                 word2 &= mask;
    652                 word3 &= (~mask); /* 0x1010101, check last bit */
    653                 word12 &= mask;
    654                 word2 >>= 1;
    655                 word2 = word2 + (word12 >> 1);
    656                 word2 += word3;
    657                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
    658                 prev += offset;
    659             }
    660             return 1;
    661         }
    662         else /* rnd1 == 0 */
    663         {
    664             for (i = B_SIZE; i > 0; i--)
    665             {
    666                 word1 = *((ULong*)prev); /* b2 b1 b0 bN1 */
    667                 word2 = *((ULong*)(prev += 4)); /* b6 b5 b4 b3 */
    668                 word12 = (word1 >> 16); /* 0 0 b2 b1 */
    669                 word1 >>= 24; /* 0 0 0 b2 */
    670                 word12 |= (word2 << 16); /* b4 b3 b2 b1 */
    671                 word1 |= (word2 << 8); /* b5 b4 b3 b2 */
    672                 word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
    673                 word1 &= mask;
    674                 word3 &= (~mask); /* 0x1010101, check last bit */
    675                 word12 &= mask;
    676                 word1 >>= 1;
    677                 word1 = word1 + (word12 >> 1);
    678                 word1 += word3;
    679                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
    680 
    681                 word1 = *((ULong*)(prev += 4)); /* b10 b9 b8 b7 */
    682                 word12 = (word2 >> 16); /* 0 0 b6 b5 */
    683                 word2 >>= 24; /* 0 0 0 b6 */
    684                 word12 |= (word1 << 16); /* b8 b7 b6 b5 */
    685                 word2 |= (word1 << 8); /* b9 b8 b7 b6 */
    686                 word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
    687                 word2 &= mask;
    688                 word3 &= (~mask); /* 0x1010101, check last bit */
    689                 word12 &= mask;
    690                 word2 >>= 1;
    691                 word2 = word2 + (word12 >> 1);
    692                 word2 += word3;
    693                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
    694                 prev += offset;
    695             }
    696             return 1;
    697         }
    698     }
    699     else /* tmp = 3 */
    700     {
    701         prev -= 3; /* word-aligned */
    702         if (rnd1 == 1)
    703         {
    704             for (i = B_SIZE; i > 0; i--)
    705             {
    706                 word1 = *((ULong*)prev); /* b1 b0 bN1 bN2 */
    707                 word2 = *((ULong*)(prev += 4)); /* b5 b4 b3 b2 */
    708                 word12 = (word1 >> 24); /* 0 0 0 b1 */
    709                 word12 |= (word2 << 8); /* b4 b3 b2 b1 */
    710                 word1 = word2;
    711                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    712                 word1 &= mask;
    713                 word3 &= (~mask); /* 0x1010101, check last bit */
    714                 word12 &= mask;
    715                 word1 >>= 1;
    716                 word1 = word1 + (word12 >> 1);
    717                 word1 += word3;
    718                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
    719 
    720                 word1 = *((ULong*)(prev += 4)); /* b9 b8 b7 b6 */
    721                 word12 = (word2 >> 24); /* 0 0 0 b5 */
    722                 word12 |= (word1 << 8); /* b8 b7 b6 b5 */
    723                 word2 = word1; /* b9 b8 b7 b6 */
    724                 word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    725                 word2 &= mask;
    726                 word3 &= (~mask); /* 0x1010101, check last bit */
    727                 word12 &= mask;
    728                 word2 >>= 1;
    729                 word2 = word2 + (word12 >> 1);
    730                 word2 += word3;
    731                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
    732                 prev += offset;
    733             }
    734             return 1;
    735         }
    736         else
    737         {
    738             for (i = B_SIZE; i > 0; i--)
    739             {
    740                 word1 = *((ULong*)prev); /* b1 b0 bN1 bN2 */
    741                 word2 = *((ULong*)(prev += 4)); /* b5 b4 b3 b2 */
    742                 word12 = (word1 >> 24); /* 0 0 0 b1 */
    743                 word12 |= (word2 << 8); /* b4 b3 b2 b1 */
    744                 word1 = word2;
    745                 word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
    746                 word1 &= mask;
    747                 word3 &= (~mask); /* 0x1010101, check last bit */
    748                 word12 &= mask;
    749                 word1 >>= 1;
    750                 word1 = word1 + (word12 >> 1);
    751                 word1 += word3;
    752                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
    753 
    754                 word1 = *((ULong*)(prev += 4)); /* b9 b8 b7 b6 */
    755                 word12 = (word2 >> 24); /* 0 0 0 b5 */
    756                 word12 |= (word1 << 8); /* b8 b7 b6 b5 */
    757                 word2 = word1; /* b9 b8 b7 b6 */
    758                 word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
    759                 word2 &= mask;
    760                 word3 &= (~mask); /* 0x1010101, check last bit */
    761                 word12 &= mask;
    762                 word2 >>= 1;
    763                 word2 = word2 + (word12 >> 1);
    764                 word2 += word3;
    765                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
    766                 prev += offset;
    767             }
    768             return 1;
    769         }
    770     }
    771 }
    772 
    773 /**************************************************************************/
    774 Int GetPredAdvBy1x0(
    775     UChar *prev,        /* i */
    776     UChar *rec,     /* i */
    777     Int lx,     /* i */
    778     Int rnd1 /* i */
    779 )
    780 {
    781     Int i;      /* loop variable */
    782     Int offset;
    783     ULong  word1, word2, word3, word12, word22;
    784     Int tmp;
    785     ULong mask;
    786 
    787     /* initialize offset to adjust pixel counter */
    788     /*    the next row; full-pel resolution      */
    789     offset = lx - B_SIZE; /* offset for prev */
    790 
    791     /* Branch based on pixel location (half-pel or full-pel) for x and y */
    792     rec -= 12; /* preset */
    793 
    794     tmp = (ULong)prev & 3;
    795     mask = 254;
    796     mask |= (mask << 8);
    797     mask |= (mask << 16); /* 0xFEFEFEFE */
    798 
    799     if (tmp == 0) /* word-aligned */
    800     {
    801         prev -= 4;
    802         if (rnd1 == 1)
    803         {
    804             for (i = B_SIZE; i > 0; i--)
    805             {
    806                 word1 = *((ULong*)(prev += 4));
    807                 word2 = *((ULong*)(prev + lx));
    808                 word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
    809                 word1 &= mask;
    810                 word3 &= (~mask); /* 0x1010101, check last bit */
    811                 word2 &= mask;
    812                 word1 >>= 1;
    813                 word1 = word1 + (word2 >> 1);
    814                 word1 += word3;
    815                 *((ULong*)(rec += 12)) = word1;
    816                 word1 = *((ULong*)(prev += 4));
    817                 word2 = *((ULong*)(prev + lx));
    818                 word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
    819                 word1 &= mask;
    820                 word3 &= (~mask); /* 0x1010101, check last bit */
    821                 word2 &= mask;
    822                 word1 >>= 1;
    823                 word1 = word1 + (word2 >> 1);
    824                 word1 += word3;
    825                 *((ULong*)(rec += 4)) = word1;
    826 
    827                 prev += offset;
    828             }
    829             return 1;
    830         }
    831         else   /* rnd1 = 0 */
    832         {
    833             for (i = B_SIZE; i > 0; i--)
    834             {
    835                 word1 = *((ULong*)(prev += 4));
    836                 word2 = *((ULong*)(prev + lx));
    837                 word3 = word1 & word2;  /* rnd1 = 0; */
    838                 word1 &= mask;
    839                 word3 &= (~mask); /* 0x1010101, check last bit */
    840                 word2 &= mask;
    841                 word1 >>= 1;
    842                 word1 = word1 + (word2 >> 1);
    843                 word1 += word3;
    844                 *((ULong*)(rec += 12)) = word1;
    845                 word1 = *((ULong*)(prev += 4));
    846                 word2 = *((ULong*)(prev + lx));
    847                 word3 = word1 & word2;  /* rnd1 = 0; */
    848                 word1 &= mask;
    849                 word3 &= (~mask); /* 0x1010101, check last bit */
    850                 word2 &= mask;
    851                 word1 >>= 1;
    852                 word1 = word1 + (word2 >> 1);
    853                 word1 += word3;
    854                 *((ULong*)(rec += 4)) = word1;
    855 
    856                 prev += offset;
    857             }
    858             return 1;
    859         }
    860     }
    861     else if (tmp == 1)
    862     {
    863         prev--; /* word-aligned */
    864         if (rnd1 == 1)
    865         {
    866             for (i = B_SIZE; i > 0; i--)
    867             {
    868                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
    869                 word22 = *((ULong*)(prev + lx));
    870 
    871                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
    872                 word2 = *((ULong*)(prev + lx));
    873                 word12 >>= 8; /* 0 b4 b3 b2 */
    874                 word22 >>= 8;
    875                 word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
    876                 word22 = word22 | (word2 << 24);
    877                 word3 = word12 | word22;
    878                 word12 &= mask;
    879                 word22 &= mask;
    880                 word3 &= (~mask); /* 0x1010101, check last bit */
    881                 word12 >>= 1;
    882                 word12 = word12 + (word22 >> 1);
    883                 word12 += word3;
    884                 *((ULong*)(rec += 12)) = word12;
    885 
    886                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
    887                 word22 = *((ULong*)(prev + lx));
    888                 word1 >>= 8; /* 0 b8 b7 b6 */
    889                 word2 >>= 8;
    890                 word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
    891                 word2 = word2 | (word22 << 24);
    892                 word3 = word1 | word2;
    893                 word1 &= mask;
    894                 word2 &= mask;
    895                 word3 &= (~mask); /* 0x1010101, check last bit */
    896                 word1 >>= 1;
    897                 word1 = word1 + (word2 >> 1);
    898                 word1 += word3;
    899                 *((ULong*)(rec += 4)) = word1;
    900                 prev += offset;
    901             }
    902             return 1;
    903         }
    904         else /* rnd1 = 0 */
    905         {
    906             for (i = B_SIZE; i > 0; i--)
    907             {
    908                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
    909                 word22 = *((ULong*)(prev + lx));
    910 
    911                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
    912                 word2 = *((ULong*)(prev + lx));
    913                 word12 >>= 8; /* 0 b4 b3 b2 */
    914                 word22 >>= 8;
    915                 word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
    916                 word22 = word22 | (word2 << 24);
    917                 word3 = word12 & word22;
    918                 word12 &= mask;
    919                 word22 &= mask;
    920                 word3 &= (~mask); /* 0x1010101, check last bit */
    921                 word12 >>= 1;
    922                 word12 = word12 + (word22 >> 1);
    923                 word12 += word3;
    924                 *((ULong*)(rec += 12)) = word12;
    925 
    926                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
    927                 word22 = *((ULong*)(prev + lx));
    928                 word1 >>= 8; /* 0 b8 b7 b6 */
    929                 word2 >>= 8;
    930                 word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
    931                 word2 = word2 | (word22 << 24);
    932                 word3 = word1 & word2;
    933                 word1 &= mask;
    934                 word2 &= mask;
    935                 word3 &= (~mask); /* 0x1010101, check last bit */
    936                 word1 >>= 1;
    937                 word1 = word1 + (word2 >> 1);
    938                 word1 += word3;
    939                 *((ULong*)(rec += 4)) = word1;
    940                 prev += offset;
    941             }
    942             return 1;
    943         }
    944     }
    945     else if (tmp == 2)
    946     {
    947         prev -= 2; /* word-aligned */
    948         if (rnd1 == 1)
    949         {
    950             for (i = B_SIZE; i > 0; i--)
    951             {
    952                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
    953                 word22 = *((ULong*)(prev + lx));
    954 
    955                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
    956                 word2 = *((ULong*)(prev + lx));
    957                 word12 >>= 16; /* 0 0 b4 b3 */
    958                 word22 >>= 16;
    959                 word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
    960                 word22 = word22 | (word2 << 16);
    961                 word3 = word12 | word22;
    962                 word12 &= mask;
    963                 word22 &= mask;
    964                 word3 &= (~mask); /* 0x1010101, check last bit */
    965                 word12 >>= 1;
    966                 word12 = word12 + (word22 >> 1);
    967                 word12 += word3;
    968                 *((ULong*)(rec += 12)) = word12;
    969 
    970                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
    971                 word22 = *((ULong*)(prev + lx));
    972                 word1 >>= 16; /* 0 0 b8 b7 */
    973                 word2 >>= 16;
    974                 word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
    975                 word2 = word2 | (word22 << 16);
    976                 word3 = word1 | word2;
    977                 word1 &= mask;
    978                 word2 &= mask;
    979                 word3 &= (~mask); /* 0x1010101, check last bit */
    980                 word1 >>= 1;
    981                 word1 = word1 + (word2 >> 1);
    982                 word1 += word3;
    983                 *((ULong*)(rec += 4)) = word1;
    984                 prev += offset;
    985             }
    986             return 1;
    987         }
    988         else /* rnd1 = 0 */
    989         {
    990             for (i = B_SIZE; i > 0; i--)
    991             {
    992                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
    993                 word22 = *((ULong*)(prev + lx));
    994 
    995                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
    996                 word2 = *((ULong*)(prev + lx));
    997                 word12 >>= 16; /* 0 0 b4 b3 */
    998                 word22 >>= 16;
    999                 word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
   1000                 word22 = word22 | (word2 << 16);
   1001                 word3 = word12 & word22;
   1002                 word12 &= mask;
   1003                 word22 &= mask;
   1004                 word3 &= (~mask); /* 0x1010101, check last bit */
   1005                 word12 >>= 1;
   1006                 word12 = word12 + (word22 >> 1);
   1007                 word12 += word3;
   1008                 *((ULong*)(rec += 12)) = word12;
   1009 
   1010                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
   1011                 word22 = *((ULong*)(prev + lx));
   1012                 word1 >>= 16; /* 0 0 b8 b7 */
   1013                 word2 >>= 16;
   1014                 word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
   1015                 word2 = word2 | (word22 << 16);
   1016                 word3 = word1 & word2;
   1017                 word1 &= mask;
   1018                 word2 &= mask;
   1019                 word3 &= (~mask); /* 0x1010101, check last bit */
   1020                 word1 >>= 1;
   1021                 word1 = word1 + (word2 >> 1);
   1022                 word1 += word3;
   1023                 *((ULong*)(rec += 4)) = word1;
   1024                 prev += offset;
   1025             }
   1026 
   1027             return 1;
   1028         }
   1029     }
   1030     else /* tmp == 3 */
   1031     {
   1032         prev -= 3; /* word-aligned */
   1033         if (rnd1 == 1)
   1034         {
   1035             for (i = B_SIZE; i > 0; i--)
   1036             {
   1037                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
   1038                 word22 = *((ULong*)(prev + lx));
   1039 
   1040                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
   1041                 word2 = *((ULong*)(prev + lx));
   1042                 word12 >>= 24; /* 0 0 0 b4 */
   1043                 word22 >>= 24;
   1044                 word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
   1045                 word22 = word22 | (word2 << 8);
   1046                 word3 = word12 | word22;
   1047                 word12 &= mask;
   1048                 word22 &= mask;
   1049                 word3 &= (~mask); /* 0x1010101, check last bit */
   1050                 word12 >>= 1;
   1051                 word12 = word12 + (word22 >> 1);
   1052                 word12 += word3;
   1053                 *((ULong*)(rec += 12)) = word12;
   1054 
   1055                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
   1056                 word22 = *((ULong*)(prev + lx));
   1057                 word1 >>= 24; /* 0 0 0 b8 */
   1058                 word2 >>= 24;
   1059                 word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
   1060                 word2 = word2 | (word22 << 8);
   1061                 word3 = word1 | word2;
   1062                 word1 &= mask;
   1063                 word2 &= mask;
   1064                 word3 &= (~mask); /* 0x1010101, check last bit */
   1065                 word1 >>= 1;
   1066                 word1 = word1 + (word2 >> 1);
   1067                 word1 += word3;
   1068                 *((ULong*)(rec += 4)) = word1;
   1069                 prev += offset;
   1070             }
   1071             return 1;
   1072         }
   1073         else /* rnd1 = 0 */
   1074         {
   1075             for (i = B_SIZE; i > 0; i--)
   1076             {
   1077                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
   1078                 word22 = *((ULong*)(prev + lx));
   1079 
   1080                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
   1081                 word2 = *((ULong*)(prev + lx));
   1082                 word12 >>= 24; /* 0 0 0 b4 */
   1083                 word22 >>= 24;
   1084                 word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
   1085                 word22 = word22 | (word2 << 8);
   1086                 word3 = word12 & word22;
   1087                 word12 &= mask;
   1088                 word22 &= mask;
   1089                 word3 &= (~mask); /* 0x1010101, check last bit */
   1090                 word12 >>= 1;
   1091                 word12 = word12 + (word22 >> 1);
   1092                 word12 += word3;
   1093                 *((ULong*)(rec += 12)) = word12;
   1094 
   1095                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
   1096                 word22 = *((ULong*)(prev + lx));
   1097                 word1 >>= 24; /* 0 0 0 b8 */
   1098                 word2 >>= 24;
   1099                 word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
   1100                 word2 = word2 | (word22 << 8);
   1101                 word3 = word1 & word2;
   1102                 word1 &= mask;
   1103                 word2 &= mask;
   1104                 word3 &= (~mask); /* 0x1010101, check last bit */
   1105                 word1 >>= 1;
   1106                 word1 = word1 + (word2 >> 1);
   1107                 word1 += word3;
   1108                 *((ULong*)(rec += 4)) = word1;
   1109                 prev += offset;
   1110             }
   1111             return 1;
   1112         } /* rnd */
   1113     } /* tmp */
   1114 }
   1115 
   1116 /**********************************************************************************/
   1117 Int GetPredAdvBy1x1(
   1118     UChar *prev,        /* i */
   1119     UChar *rec,     /* i */
   1120     Int lx,     /* i */
   1121     Int rnd1 /* i */
   1122 )
   1123 {
   1124     Int i;      /* loop variable */
   1125     Int offset;
   1126     ULong  x1, x2, x1m, x2m, y1, y2, y1m, y2m; /* new way */
   1127     Int tmp;
   1128     Int rnd2;
   1129     ULong mask;
   1130 
   1131     /* initialize offset to adjust pixel counter */
   1132     /*    the next row; full-pel resolution      */
   1133     offset = lx - B_SIZE; /* offset for prev */
   1134 
   1135     rnd2 = rnd1 + 1;
   1136     rnd2 |= (rnd2 << 8);
   1137     rnd2 |= (rnd2 << 16);
   1138 
   1139     mask = 0x3F;
   1140     mask |= (mask << 8);
   1141     mask |= (mask << 16); /* 0x3f3f3f3f */
   1142 
   1143     tmp = (ULong)prev & 3;
   1144 
   1145     rec -= 4; /* preset */
   1146 
   1147     if (tmp == 0) /* word-aligned */
   1148     {
   1149         for (i = B_SIZE; i > 0; i--)
   1150         {
   1151             x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
   1152             x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
   1153             y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
   1154             y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
   1155 
   1156             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
   1157             x2m = (x2 >> 2) & mask;
   1158             x1 = x1 ^(x1m << 2);
   1159             x2 = x2 ^(x2m << 2);
   1160             x1m += x2m;
   1161             x1 += x2;
   1162 
   1163             /* x2m, x2 free */
   1164             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
   1165             y2m = (y2 >> 2) & mask;
   1166             y1 = y1 ^(y1m << 2);
   1167             y2 = y2 ^(y2m << 2);
   1168             y1m += y2m;
   1169             y1 += y2;
   1170 
   1171             /* y2m, y2 free */
   1172             /* x2m, x2 free */
   1173             x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
   1174             y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
   1175             x2m = (x2 >> 2) & mask;
   1176             y2m = (y2 >> 2) & mask;
   1177             x2 = x2 ^(x2m << 2);
   1178             y2 = y2 ^(y2m << 2);
   1179             x2m += y2m;
   1180             x2 += y2;
   1181             /* y2m, y2 free */
   1182 
   1183             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
   1184             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
   1185             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
   1186             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
   1187             /* x1, y1, x2 */
   1188 
   1189             y2m = x1m >> 8;
   1190             y2 = x1 >> 8;
   1191             y2m |= (y1m << 24);  /* a4+b4, a3+b3, a2+b2, a1+b1 */
   1192             y2 |= (y1 << 24);
   1193             x1m += y2m;  /* a3+b3+a4+b4, ....., a0+b0+a1+b1 */
   1194             x1 += y2;
   1195             x1 += rnd2;
   1196             x1 &= (mask << 2);
   1197             x1m += (x1 >> 2);
   1198             *((ULong*)(rec += 4)) = x1m; /* save x1m */
   1199 
   1200             y2m = y1m >> 8;
   1201             y2 = y1 >> 8;
   1202             y2m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
   1203             y2 |= (x2 << 24);
   1204             y1m += y2m;  /* a7+b7+a8+b8, ....., a4+b4+a5+b5 */
   1205             y1 += y2;
   1206             y1 += rnd2;
   1207             y1 &= (mask << 2);
   1208             y1m += (y1 >> 2);
   1209             *((ULong*)(rec += 4)) = y1m; /* save y1m */
   1210 
   1211             rec += 8;
   1212             prev += offset;
   1213         }
   1214 
   1215         return 1;
   1216     }
   1217     else if (tmp == 1)
   1218     {
   1219         prev--; /* to word-aligned */
   1220         for (i = B_SIZE; i > 0; i--)
   1221         {
   1222             x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
   1223             x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
   1224             y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
   1225             y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
   1226 
   1227             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
   1228             x2m = (x2 >> 2) & mask;
   1229             x1 = x1 ^(x1m << 2);
   1230             x2 = x2 ^(x2m << 2);
   1231             x1m += x2m;
   1232             x1 += x2;
   1233 
   1234             /* x2m, x2 free */
   1235             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
   1236             y2m = (y2 >> 2) & mask;
   1237             y1 = y1 ^(y1m << 2);
   1238             y2 = y2 ^(y2m << 2);
   1239             y1m += y2m;
   1240             y1 += y2;
   1241 
   1242             /* y2m, y2 free */
   1243             /* x2m, x2 free */
   1244             x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
   1245             y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
   1246             x2m = (x2 >> 2) & mask;
   1247             y2m = (y2 >> 2) & mask;
   1248             x2 = x2 ^(x2m << 2);
   1249             y2 = y2 ^(y2m << 2);
   1250             x2m += y2m;
   1251             x2 += y2;
   1252             /* y2m, y2 free */
   1253 
   1254             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
   1255             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
   1256             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
   1257             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
   1258             /* x1, y1, x2 */
   1259 
   1260             x1m >>= 8 ;
   1261             x1 >>= 8;
   1262             x1m |= (y1m << 24);  /* a4+b4, a3+b3, a2+b2, a1+b1 */
   1263             x1 |= (y1 << 24);
   1264             y2m = (y1m << 16);
   1265             y2 = (y1 << 16);
   1266             y2m |= (x1m >> 8); /* a5+b5, a4+b4, a3+b3, a2+b2 */
   1267             y2 |= (x1 >> 8);
   1268             x1 += rnd2;
   1269             x1m += y2m;  /* a4+b4+a5+b5, ....., a1+b1+a2+b2 */
   1270             x1 += y2;
   1271             x1 &= (mask << 2);
   1272             x1m += (x1 >> 2);
   1273             *((ULong*)(rec += 4)) = x1m; /* save x1m */
   1274 
   1275             y1m >>= 8;
   1276             y1 >>= 8;
   1277             y1m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
   1278             y1 |= (x2 << 24);
   1279             y2m = (x2m << 16);
   1280             y2 = (x2 << 16);
   1281             y2m |= (y1m >> 8); /*  a9+b9, a8+b8, a7+b7, a6+b6,*/
   1282             y2 |= (y1 >> 8);
   1283             y1 += rnd2;
   1284             y1m += y2m;  /* a8+b8+a9+b9, ....., a5+b5+a6+b6 */
   1285             y1 += y2;
   1286             y1 &= (mask << 2);
   1287             y1m += (y1 >> 2);
   1288             *((ULong*)(rec += 4)) = y1m; /* save y1m */
   1289 
   1290             rec += 8;
   1291             prev += offset;
   1292         }
   1293         return 1;
   1294     }
   1295     else if (tmp == 2)
   1296     {
   1297         prev -= 2; /* to word-aligned */
   1298         for (i = B_SIZE; i > 0; i--)
   1299         {
   1300             x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
   1301             x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
   1302             y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
   1303             y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
   1304 
   1305             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
   1306             x2m = (x2 >> 2) & mask;
   1307             x1 = x1 ^(x1m << 2);
   1308             x2 = x2 ^(x2m << 2);
   1309             x1m += x2m;
   1310             x1 += x2;
   1311 
   1312             /* x2m, x2 free */
   1313             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
   1314             y2m = (y2 >> 2) & mask;
   1315             y1 = y1 ^(y1m << 2);
   1316             y2 = y2 ^(y2m << 2);
   1317             y1m += y2m;
   1318             y1 += y2;
   1319 
   1320             /* y2m, y2 free */
   1321             /* x2m, x2 free */
   1322             x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
   1323             y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
   1324             x2m = (x2 >> 2) & mask;
   1325             y2m = (y2 >> 2) & mask;
   1326             x2 = x2 ^(x2m << 2);
   1327             y2 = y2 ^(y2m << 2);
   1328             x2m += y2m;
   1329             x2 += y2;
   1330             /* y2m, y2 free */
   1331 
   1332             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
   1333             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
   1334             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
   1335             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
   1336             /* x1, y1, x2 */
   1337 
   1338             x1m >>= 16 ;
   1339             x1 >>= 16;
   1340             x1m |= (y1m << 16);  /* a5+b5, a4+b4, a3+b3, a2+b2 */
   1341             x1 |= (y1 << 16);
   1342             y2m = (y1m << 8);
   1343             y2 = (y1 << 8);
   1344             y2m |= (x1m >> 8); /* a6+b6, a5+b5, a4+b4, a3+b3 */
   1345             y2 |= (x1 >> 8);
   1346             x1 += rnd2;
   1347             x1m += y2m;  /* a5+b5+a6+b6, ....., a2+b2+a3+b3 */
   1348             x1 += y2;
   1349             x1 &= (mask << 2);
   1350             x1m += (x1 >> 2);
   1351             *((ULong*)(rec += 4)) = x1m; /* save x1m */
   1352 
   1353             y1m >>= 16;
   1354             y1 >>= 16;
   1355             y1m |= (x2m << 16); /* a9+b9, a8+b8, a7+b7, a6+b6 */
   1356             y1 |= (x2 << 16);
   1357             y2m = (x2m << 8);
   1358             y2 = (x2 << 8);
   1359             y2m |= (y1m >> 8); /*  a10+b10, a9+b9, a8+b8, a7+b7,*/
   1360             y2 |= (y1 >> 8);
   1361             y1 += rnd2;
   1362             y1m += y2m;  /* a9+b9+a10+b10, ....., a6+b6+a7+b7 */
   1363             y1 += y2;
   1364             y1 &= (mask << 2);
   1365             y1m += (y1 >> 2);
   1366             *((ULong*)(rec += 4)) = y1m; /* save y1m */
   1367 
   1368             rec += 8;
   1369             prev += offset;
   1370         }
   1371         return 1;
   1372     }
   1373     else /* tmp == 3 */
   1374     {
   1375         prev -= 3; /* to word-aligned */
   1376         for (i = B_SIZE; i > 0; i--)
   1377         {
   1378             x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
   1379             x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
   1380             y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
   1381             y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
   1382 
   1383             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
   1384             x2m = (x2 >> 2) & mask;
   1385             x1 = x1 ^(x1m << 2);
   1386             x2 = x2 ^(x2m << 2);
   1387             x1m += x2m;
   1388             x1 += x2;
   1389 
   1390             /* x2m, x2 free */
   1391             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
   1392             y2m = (y2 >> 2) & mask;
   1393             y1 = y1 ^(y1m << 2);
   1394             y2 = y2 ^(y2m << 2);
   1395             y1m += y2m;
   1396             y1 += y2;
   1397 
   1398             /* y2m, y2 free */
   1399             /* x2m, x2 free */
   1400             x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
   1401             y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
   1402             x2m = (x2 >> 2) & mask;
   1403             y2m = (y2 >> 2) & mask;
   1404             x2 = x2 ^(x2m << 2);
   1405             y2 = y2 ^(y2m << 2);
   1406             x2m += y2m;
   1407             x2 += y2;
   1408             /* y2m, y2 free */
   1409 
   1410             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
   1411             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
   1412             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
   1413             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
   1414             /* x1, y1, x2 */
   1415 
   1416             x1m >>= 24 ;
   1417             x1 >>= 24;
   1418             x1m |= (y1m << 8);  /* a6+b6, a5+b5, a4+b4, a3+b3 */
   1419             x1 |= (y1 << 8);
   1420 
   1421             x1m += y1m;  /* a6+b6+a7+b7, ....., a3+b3+a4+b4 */
   1422             x1 += y1;
   1423             x1 += rnd2;
   1424             x1 &= (mask << 2);
   1425             x1m += (x1 >> 2);
   1426             *((ULong*)(rec += 4)) = x1m; /* save x1m */
   1427 
   1428             y1m >>= 24;
   1429             y1 >>= 24;
   1430             y1m |= (x2m << 8); /* a10+b10, a9+b9, a8+b8, a7+b7 */
   1431             y1 |= (x2 << 8);
   1432             y1m += x2m;  /* a10+b10+a11+b11, ....., a7+b7+a8+b8 */
   1433             y1 += x2;
   1434             y1 += rnd2;
   1435             y1 &= (mask << 2);
   1436             y1m += (y1 >> 2);
   1437             *((ULong*)(rec += 4)) = y1m; /* save y1m */
   1438 
   1439             rec += 8;
   1440             prev += offset;
   1441         }
   1442         return 1;
   1443     }
   1444 }
   1445 
   1446 
   1447 /*=============================================================================
   1448     Function:   EncGetPredOutside
   1449     Date:       04/17/2001
   1450     Purpose:    - modified from GetPredOutside in the decoder.
   1451     Modified:    09/24/05
   1452                 use the existing non-initialized padded region
   1453 =============================================================================*/
   1454 // not really needed since padding is included
   1455 #define PAD_CORNER  { temp = *src; \
   1456                      temp |= (temp<<8); \
   1457                      temp |= (temp<<16); \
   1458                      *((ULong*)dst) = temp; \
   1459                      *((ULong*)(dst+4)) = temp; \
   1460                      *((ULong*)(dst+=lx)) = temp; \
   1461                      *((ULong*)(dst+4)) = temp; \
   1462                      *((ULong*)(dst+=lx)) = temp; \
   1463                      *((ULong*)(dst+4)) = temp; \
   1464                      *((ULong*)(dst+=lx)) = temp; \
   1465                      *((ULong*)(dst+4)) = temp; \
   1466                      *((ULong*)(dst+=lx)) = temp; \
   1467                      *((ULong*)(dst+4)) = temp; \
   1468                      *((ULong*)(dst+=lx)) = temp; \
   1469                      *((ULong*)(dst+4)) = temp; \
   1470                      *((ULong*)(dst+=lx)) = temp; \
   1471                      *((ULong*)(dst+4)) = temp; \
   1472                      *((ULong*)(dst+=lx)) = temp; \
   1473                      *((ULong*)(dst+4)) = temp; }
   1474 
   1475 #define PAD_ROW     { temp = *((ULong*)src); \
   1476                       temp2 = *((ULong*)(src+4)); \
   1477                       *((ULong*)dst) = temp; \
   1478                       *((ULong*)(dst+4)) = temp2; \
   1479                       *((ULong*)(dst+=lx)) = temp; \
   1480                       *((ULong*)(dst+4)) = temp2; \
   1481                       *((ULong*)(dst+=lx)) = temp; \
   1482                       *((ULong*)(dst+4)) = temp2; \
   1483                       *((ULong*)(dst+=lx)) = temp; \
   1484                       *((ULong*)(dst+4)) = temp2; \
   1485                       *((ULong*)(dst+=lx)) = temp; \
   1486                       *((ULong*)(dst+4)) = temp2; \
   1487                       *((ULong*)(dst+=lx)) = temp; \
   1488                       *((ULong*)(dst+4)) = temp2; \
   1489                       *((ULong*)(dst+=lx)) = temp; \
   1490                       *((ULong*)(dst+4)) = temp2; \
   1491                       *((ULong*)(dst+=lx)) = temp; \
   1492                       *((ULong*)(dst+4)) = temp2; }
   1493 
   1494 #define PAD_COL     { temp = *src;   temp |= (temp<<8);  temp |= (temp<<16); \
   1495                       *((ULong*)dst) = temp; \
   1496                      *((ULong*)(dst+4)) = temp; \
   1497                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
   1498                       *((ULong*)(dst+=lx)) = temp; \
   1499                      *((ULong*)(dst+4)) = temp; \
   1500                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
   1501                       *((ULong*)(dst+=lx)) = temp; \
   1502                      *((ULong*)(dst+4)) = temp; \
   1503                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
   1504                       *((ULong*)(dst+=lx)) = temp; \
   1505                      *((ULong*)(dst+4)) = temp; \
   1506                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
   1507                       *((ULong*)(dst+=lx)) = temp; \
   1508                      *((ULong*)(dst+4)) = temp; \
   1509                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
   1510                       *((ULong*)(dst+=lx)) = temp; \
   1511                      *((ULong*)(dst+4)) = temp; \
   1512                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
   1513                       *((ULong*)(dst+=lx)) = temp; \
   1514                      *((ULong*)(dst+4)) = temp; \
   1515                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
   1516                       *((ULong*)(dst+=lx)) = temp; \
   1517                       *((ULong*)(dst+4)) = temp; }
   1518 
   1519 
   1520 Int EncGetPredOutside(Int xpos, Int ypos, UChar *c_prev, UChar *rec,
   1521                       Int width, Int height, Int rnd1)
   1522 {
   1523     Int lx;
   1524     UChar *src, *dst;
   1525     ULong temp, temp2;
   1526     Int xoffset;
   1527 
   1528     lx = width + 16; /* only works for chroma */
   1529 
   1530     if (xpos < 0)
   1531     {
   1532         if (ypos < 0) /* pad top-left */
   1533         {
   1534             /* pad corner */
   1535             src = c_prev;
   1536             dst = c_prev - (lx << 3) - 8;
   1537             PAD_CORNER
   1538 
   1539             /* pad top */
   1540             dst = c_prev - (lx << 3);
   1541             PAD_ROW
   1542 
   1543             /* pad left */
   1544             dst = c_prev - 8;
   1545             PAD_COL
   1546 
   1547             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
   1548                                              rec, lx, rnd1);
   1549 
   1550             return 1;
   1551         }
   1552         else if ((ypos >> 1) < (height - 8)) /* pad left of frame */
   1553         {
   1554             /* pad left */
   1555             src = c_prev + (ypos >> 1) * lx;
   1556             dst = src - 8;
   1557             PAD_COL
   1558             /* pad extra row */
   1559             temp = *(src += lx);
   1560             temp |= (temp << 8);
   1561             temp |= (temp << 16);
   1562             *((ULong*)(dst += lx)) = temp;
   1563             *((ULong*)(dst + 4)) = temp;
   1564 
   1565             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
   1566                                              rec, lx, rnd1);
   1567 
   1568             return 1;
   1569         }
   1570         else /* pad bottom-left */
   1571         {
   1572             /* pad corner */
   1573             src = c_prev + (height - 1) * lx;
   1574             dst = src + lx - 8;
   1575             PAD_CORNER
   1576 
   1577             /* pad bottom */
   1578             dst = src + lx;
   1579             PAD_ROW
   1580 
   1581             /* pad left */
   1582             src -= (lx << 3);
   1583             src += lx;
   1584             dst = src - 8;
   1585             PAD_COL
   1586 
   1587             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
   1588                                              rec, lx, rnd1);
   1589 
   1590             return 1;
   1591         }
   1592     }
   1593     else if ((xpos >> 1) < (width - 8))
   1594     {
   1595         if (ypos < 0) /* pad top of frame */
   1596         {
   1597             xoffset = (xpos >> 1) & 0x3;
   1598             src = c_prev + (xpos >> 1) - xoffset;
   1599             dst = src - (lx << 3);
   1600             PAD_ROW
   1601             if (xoffset || (xpos&1))
   1602             {
   1603                 temp = *((ULong*)(src + 8));
   1604                 dst = src - (lx << 3) + 8;
   1605                 *((ULong*)dst) = temp;
   1606                 *((ULong*)(dst += lx)) = temp;
   1607                 *((ULong*)(dst += lx)) = temp;
   1608                 *((ULong*)(dst += lx)) = temp;
   1609                 *((ULong*)(dst += lx)) = temp;
   1610                 *((ULong*)(dst += lx)) = temp;
   1611                 *((ULong*)(dst += lx)) = temp;
   1612                 *((ULong*)(dst += lx)) = temp;
   1613             }
   1614 
   1615             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
   1616                                              rec, lx, rnd1);
   1617 
   1618             return 1;
   1619         }
   1620         else /* pad bottom of frame */
   1621         {
   1622             xoffset = (xpos >> 1) & 0x3;
   1623             src = c_prev + (xpos >> 1) - xoffset + (height - 1) * lx;
   1624             dst = src + lx;
   1625             PAD_ROW
   1626             if (xoffset || (xpos&1))
   1627             {
   1628                 temp = *((ULong*)(src + 8));
   1629                 dst = src + lx + 8;
   1630                 *((ULong*)dst) = temp;
   1631                 *((ULong*)(dst += lx)) = temp;
   1632                 *((ULong*)(dst += lx)) = temp;
   1633                 *((ULong*)(dst += lx)) = temp;
   1634                 *((ULong*)(dst += lx)) = temp;
   1635                 *((ULong*)(dst += lx)) = temp;
   1636                 *((ULong*)(dst += lx)) = temp;
   1637                 *((ULong*)(dst += lx)) = temp;
   1638             }
   1639 
   1640             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
   1641                                              rec, lx, rnd1);
   1642 
   1643             return 1;
   1644         }
   1645     }
   1646     else
   1647     {
   1648         if (ypos < 0) /* pad top-right */
   1649         {
   1650             /* pad corner */
   1651             src = c_prev + width - 1;
   1652             dst = src - (lx << 3) + 1;
   1653             PAD_CORNER
   1654 
   1655             /* pad top */
   1656             src -= 7;
   1657             dst = src - (lx << 3);
   1658             PAD_ROW
   1659 
   1660             /* pad left */
   1661             src += 7;
   1662             dst = src + 1;
   1663             PAD_COL
   1664 
   1665             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
   1666                                              rec, lx, rnd1);
   1667 
   1668             return 1;
   1669         }
   1670         else if ((ypos >> 1) < (height - B_SIZE)) /* pad right of frame */
   1671         {
   1672             /* pad left */
   1673             src = c_prev + (ypos >> 1) * lx + width - 1;
   1674             dst = src + 1;
   1675             PAD_COL
   1676             /* pad extra row */
   1677             temp = *(src += lx);
   1678             temp |= (temp << 8);
   1679             temp |= (temp << 16);
   1680             *((ULong*)(dst += lx)) = temp;
   1681             *((ULong*)(dst + 4)) = temp;
   1682 
   1683             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
   1684                                              rec, lx, rnd1);
   1685 
   1686             return 1;
   1687         }
   1688         else /* pad bottom-right */
   1689         {
   1690             /* pad left */
   1691             src = c_prev + (height - 8) * lx + width - 1;
   1692             dst = src + 1;
   1693             PAD_COL
   1694 
   1695             /* pad corner */
   1696             dst = src + lx + 1;
   1697             PAD_CORNER
   1698 
   1699             /* pad bottom */
   1700             src -= 7;
   1701             dst = src + lx;
   1702             PAD_ROW
   1703 
   1704             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
   1705                                              rec, lx, rnd1);
   1706 
   1707             return 1;
   1708         }
   1709     }
   1710 }
   1711 
   1712 /* ====================================================================== /
   1713     Function : Copy_MB_from_Vop()
   1714     Date     : 04/17/2001
   1715  ====================================================================== */
   1716 
   1717 void Copy_MB_from_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int pitch)
   1718 {
   1719     Int row, col, i;
   1720     Int *src1, *src2;
   1721     Int offset = pitch - MB_SIZE;
   1722     ULong temp;
   1723 
   1724     for (i = 0; i < 4; i += 2)
   1725     {
   1726         src1 = yChan[i];
   1727         src2 = yChan[i+1];
   1728 
   1729         row = B_SIZE;
   1730         while (row--)
   1731         {
   1732             col = B_SIZE;
   1733             while (col)
   1734             {
   1735                 temp = *((ULong*)comp);
   1736                 *src1++ = (Int)(temp & 0xFF);
   1737                 *src1++ = (Int)((temp >> 8) & 0xFF);
   1738                 *src1++ = (Int)((temp >> 16) & 0xFF);
   1739                 *src1++ = (Int)((temp >> 24) & 0xFF);
   1740                 comp += 4;
   1741                 col -= 4;
   1742             }
   1743             col = B_SIZE;
   1744             while (col)
   1745             {
   1746                 temp = *((ULong*)comp);
   1747                 *src2++ = (Int)(temp & 0xFF);
   1748                 *src2++ = (Int)((temp >> 8) & 0xFF);
   1749                 *src2++ = (Int)((temp >> 16) & 0xFF);
   1750                 *src2++ = (Int)((temp >> 24) & 0xFF);
   1751                 comp += 4;
   1752                 col -= 4;
   1753             }
   1754             comp += offset;
   1755         }
   1756     }
   1757     return ;
   1758 }
   1759 
   1760 /* ====================================================================== /
   1761     Function : Copy_B_from_Vop()
   1762     Date     : 04/17/2001
   1763 / ====================================================================== */
   1764 
   1765 void Copy_B_from_Vop(UChar *comp, Int cChan[], Int pitch)
   1766 {
   1767     Int row, col;
   1768     Int offset = pitch - B_SIZE;
   1769     ULong temp;
   1770 
   1771     row = B_SIZE;
   1772     while (row--)
   1773     {
   1774         col = B_SIZE;
   1775         while (col)
   1776         {
   1777             temp = *((ULong*)comp);
   1778             *cChan++ = (Int)(temp & 0xFF);
   1779             *cChan++ = (Int)((temp >> 8) & 0xFF);
   1780             *cChan++ = (Int)((temp >> 16) & 0xFF);
   1781             *cChan++ = (Int)((temp >> 24) & 0xFF);
   1782             comp += 4;
   1783             col -= 4;
   1784         }
   1785         comp += offset;
   1786     }
   1787 }
   1788 
   1789 /* ====================================================================== /
   1790     Function : Copy_MB_into_Vop()
   1791     Date     : 04/17/2001
   1792     History  : From decoder
   1793 / ====================================================================== */
   1794 
   1795 void Copy_MB_into_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int pitch)
   1796 {
   1797     Int row, col, i;
   1798     Int *src1, *src2;
   1799     Int offset = pitch - MB_SIZE;
   1800     UChar mask = 0xFF;
   1801     Int tmp;
   1802     ULong temp;
   1803 
   1804     for (i = 0; i < 4; i += 2)
   1805     {
   1806         src1 = yChan[i];
   1807         src2 = yChan[i+1];
   1808 
   1809         row = B_SIZE;
   1810         while (row--)
   1811         {
   1812             col = B_SIZE;
   1813             while (col)
   1814             {
   1815                 tmp = (*src1++);
   1816                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1817                 temp = tmp << 24;
   1818                 tmp = (*src1++);
   1819                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1820                 temp |= (tmp << 16);
   1821                 tmp = (*src1++);
   1822                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1823                 temp |= (tmp << 8);
   1824                 tmp = (*src1++);
   1825                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1826                 temp |= tmp;
   1827                 *((ULong*)comp) = temp;
   1828                 comp += 4;
   1829                 col -= 4;
   1830             }
   1831             col = B_SIZE;
   1832             while (col)
   1833             {
   1834                 tmp = (*src2++);
   1835                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1836                 temp = tmp << 24;
   1837                 tmp = (*src2++);
   1838                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1839                 temp |= (tmp << 16);
   1840                 tmp = (*src2++);
   1841                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1842                 temp |= (tmp << 8);
   1843                 tmp = (*src2++);
   1844                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1845                 temp |= tmp;
   1846                 *((ULong*)comp) = temp;
   1847                 comp += 4;
   1848                 col -= 4;
   1849             }
   1850             comp += offset;
   1851         }
   1852     }
   1853     return ;
   1854 }
   1855 
   1856 
   1857 /* ====================================================================== /
   1858     Function : Copy_B_into_Vop()
   1859     Date     : 04/17/2001
   1860     History  : From decoder
   1861 / ====================================================================== */
   1862 
   1863 void Copy_B_into_Vop(UChar *comp, Int cChan[], Int pitch)
   1864 {
   1865     Int row, col;
   1866     Int offset = pitch - B_SIZE;
   1867     Int tmp;
   1868     UChar mask = 0xFF;
   1869     ULong temp;
   1870 
   1871     row = B_SIZE;
   1872     while (row--)
   1873     {
   1874         col = B_SIZE;
   1875         while (col)
   1876         {
   1877             tmp = (*cChan++);
   1878             if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1879             temp = tmp << 24;
   1880             tmp = (*cChan++);
   1881             if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1882             temp |= (tmp << 16);
   1883             tmp = (*cChan++);
   1884             if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1885             temp |= (tmp << 8);
   1886             tmp = (*cChan++);
   1887             if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
   1888             temp |= tmp;
   1889             *((ULong*)comp) = temp;
   1890             comp += 4;
   1891             col -= 4;
   1892         }
   1893         comp += offset;
   1894     }
   1895 }
   1896 
   1897 /* ======================================================================== */
   1898 /*  Function : get_MB( )                                                    */
   1899 /*  Date     : 10/03/2000                                                   */
   1900 /*  Purpose  : Copy 4 Y to reference frame                                  */
   1901 /*  In/out   :                                                              */
   1902 /*  Return   :                                                              */
   1903 /*  Modified :                                                              */
   1904 /* ======================================================================== */
   1905 void get_MB(UChar *c_prev, UChar *c_prev_u  , UChar *c_prev_v,
   1906             Short mb[6][64], Int lx, Int lx_uv)
   1907 
   1908 {
   1909     Int i, j, count = 0, count1 = 0;
   1910     Int k1 = lx - MB_SIZE, k2 = lx_uv - B_SIZE;
   1911 
   1912     for (i = 0; i < B_SIZE; i++)
   1913     {
   1914         for (j = 0; j < B_SIZE; j++)
   1915         {
   1916             mb[0][count] = (Int)(*c_prev++);
   1917             mb[4][count] = (Int)(*c_prev_u++);
   1918             mb[5][count++] = (Int)(*c_prev_v++);
   1919         }
   1920 
   1921         for (j = 0; j < B_SIZE; j++)
   1922             mb[1][count1++] = (Int)(*c_prev++);
   1923 
   1924         c_prev += k1;
   1925         c_prev_u += k2;
   1926         c_prev_v += k2;
   1927 
   1928 
   1929     }
   1930 
   1931     count = count1 = 0;
   1932     for (i = 0; i < B_SIZE; i++)
   1933     {
   1934         for (j = 0; j < B_SIZE; j++)
   1935             mb[2][count++] = (Int)(*c_prev++);
   1936 
   1937         for (j = 0; j < B_SIZE; j++)
   1938             mb[3][count1++] = (Int)(*c_prev++);
   1939 
   1940         c_prev += k1;
   1941     }
   1942 }
   1943 
   1944 void PutSkippedBlock(UChar *rec, UChar *prev, Int lx)
   1945 {
   1946     UChar *end;
   1947     Int offset = (lx - 8) >> 2;
   1948     Int *src, *dst;
   1949 
   1950     dst = (Int*)rec;
   1951     src = (Int*)prev;
   1952 
   1953     end = prev + (lx << 3);
   1954 
   1955     do
   1956     {
   1957         *dst++ = *src++;
   1958         *dst++ = *src++;
   1959         dst += offset;
   1960         src += offset;
   1961     }
   1962     while ((UInt)src < (UInt)end);
   1963 
   1964     return ;
   1965 }
   1966