Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 /*
     19 ------------------------------------------------------------------------------
     20  INPUT AND OUTPUT DEFINITIONS
     21 
     22  Inputs:
     23     xpos = x half-pixel of (x,y) coordinates within a VOP; motion
     24            compensated coordinates; native type
     25     ypos = y half-pixel of (x,y) coordinates within a VOP; motion
     26            compensated coordinates; native type
     27     comp = pointer to 8-bit compensated prediction values within a VOP;
     28         computed by this module (i/o); full-pel resolution
     29     c_prev = pointer to previous 8-bit prediction values within a VOP;
     30           values range from (0-255); full-pel resolution
     31     sh_d = pointer to residual values used to compensate the predicted
     32         value; values range from (-512 to 511); full-pel resolution
     33     width = width of the VOP in pixels (x axis); full-pel resolution
     34     rnd1 = rounding value for case when one dimension uses half-pel
     35            resolution
     36     rnd2 = rounding value for case when two dimensions uses half-pel
     37            resolution
     38     CBP = flag indicating whether residual is all zeros
     39           (0 -> all zeros, 1 -> not all zeros)
     40         outside_flag = flag indicating whether motion vector is outside the
     41                VOP (0 -> inside, 1 -> outside)
     42 
     43  Outputs:
     44     returns 1
     45 
     46  Local Stores/Buffers/Pointers Needed:
     47     None
     48 
     49  Global Stores/Buffers/Pointers Needed:
     50     None
     51 
     52  Pointers and Buffers Modified:
     53     comp = buffer contains newly computed compensated prediction values
     54 
     55  Local Stores Modified:
     56     None
     57 
     58  Global Stores Modified:
     59     None
     60 
     61 ------------------------------------------------------------------------------
     62  FUNCTION DESCRIPTION
     63 
     64  Compute pixel values for a block in the current VOP. The prediction
     65  values are generated by averaging pixel values in the previous VOP; the
     66  block position in the previous frame is computed from the current block's
     67  motion vector. The computed pixel values are then computed by adding the
     68  prediction values to the block residual values.
     69 
     70 
     71 ------------------------------------------------------------------------------
     72 */
     73 
     74 /*----------------------------------------------------------------------------
     75 ; INCLUDES
     76 ----------------------------------------------------------------------------*/
     77 #include "mp4dec_lib.h"
     78 #include "motion_comp.h"
     79 
     80 #define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA
     81 
     82 int GetPredAdvancedBy0x0(
     83     uint8 *prev,        /* i */
     84     uint8 *pred_block,      /* i */
     85     int width,      /* i */
     86     int pred_width_rnd /* i */
     87 )
     88 {
     89     uint    i;      /* loop variable */
     90     int offset, offset2;
     91     uint32  pred_word, word1, word2;
     92     int tmp;
     93 
     94     /* initialize offset to adjust pixel counter */
     95     /*    the next row; full-pel resolution      */
     96     offset = width - B_SIZE; /* offset for prev */
     97     offset2 = (pred_width_rnd >> 1) - 4; /* offset for pred_block */
     98 
     99     tmp = (uint32)prev & 0x3;
    100     pred_block -= offset2; /* preset */
    101 
    102     if (tmp == 0)  /* word-aligned */
    103     {
    104         for (i = B_SIZE; i > 0; i--)
    105         {
    106             *((uint32*)(pred_block += offset2)) = *((uint32*)prev);
    107             *((uint32*)(pred_block += 4)) = *((uint32*)(prev + 4));
    108             prev += width;
    109         }
    110         return 1;
    111     }
    112     else if (tmp == 1) /* first position */
    113     {
    114         prev--; /* word-aligned */
    115 
    116         for (i = B_SIZE; i > 0; i--)
    117         {
    118             word1 = *((uint32*)prev); /* read 4 bytes, b4 b3 b2 b1 */
    119             word2 = *((uint32*)(prev += 4));  /* read 4 bytes, b8 b7 b6 b5 */
    120             word1 >>= 8; /* 0 b4 b3 b2 */
    121             pred_word = word1 | (word2 << 24);  /* b5 b4 b3 b2 */
    122             *((uint32*)(pred_block += offset2)) = pred_word;
    123 
    124             word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
    125             word2 >>= 8; /* 0 b8 b7 b6 */
    126             pred_word = word2 | (word1 << 24); /* b9 b8 b7 b6 */
    127             *((uint32*)(pred_block += 4)) = pred_word;
    128 
    129             prev += offset;
    130         }
    131 
    132         return 1;
    133     }
    134     else if (tmp == 2) /* second position */
    135     {
    136         prev -= 2; /* word1-aligned */
    137 
    138         for (i = B_SIZE; i > 0; i--)
    139         {
    140             word1 = *((uint32*)prev); /* read 4 bytes, b4 b3 b2 b1 */
    141             word2 = *((uint32*)(prev += 4));  /* read 4 bytes, b8 b7 b6 b5 */
    142             word1 >>= 16; /* 0 0 b4 b3 */
    143             pred_word = word1 | (word2 << 16);  /* b6 b5 b4 b3 */
    144             *((uint32*)(pred_block += offset2)) = pred_word;
    145 
    146             word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
    147             word2 >>= 16; /* 0 0 b8 b7 */
    148             pred_word = word2 | (word1 << 16); /* b10 b9 b8 b7 */
    149             *((uint32*)(pred_block += 4)) = pred_word;
    150 
    151 
    152             prev += offset;
    153         }
    154 
    155         return 1;
    156     }
    157     else /* third position */
    158     {
    159         prev -= 3; /* word1-aligned */
    160 
    161         for (i = B_SIZE; i > 0; i--)
    162         {
    163             word1 = *((uint32*)prev); /* read 4 bytes, b4 b3 b2 b1 */
    164             word2 = *((uint32*)(prev += 4));  /* read 4 bytes, b8 b7 b6 b5 */
    165             word1 >>= 24; /* 0 0 0 b4 */
    166             pred_word = word1 | (word2 << 8);   /* b7 b6 b5 b4 */
    167             *((uint32*)(pred_block += offset2)) = pred_word;
    168 
    169             word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
    170             word2 >>= 24; /* 0 0 0 b8 */
    171             pred_word = word2 | (word1 << 8); /* b11 b10 b9 b8 */
    172             *((uint32*)(pred_block += 4)) = pred_word;
    173 
    174             prev += offset;
    175         }
    176 
    177         return 1;
    178     }
    179 }
    180 
    181 /**************************************************************************/
    182 int GetPredAdvancedBy0x1(
    183     uint8 *prev,        /* i */
    184     uint8 *pred_block,      /* i */
    185     int width,      /* i */
    186     int pred_width_rnd /* i */
    187 )
    188 {
    189     uint    i;      /* loop variable */
    190     int offset, offset2;
    191     uint32 word1, word2, word3, word12;
    192     int tmp;
    193     int rnd1;
    194     uint32 mask;
    195 
    196     /* initialize offset to adjust pixel counter */
    197     /*    the next row; full-pel resolution      */
    198     offset = width - B_SIZE; /* offset for prev */
    199     offset2 = (pred_width_rnd >> 1) - 4; /* offset of pred_block */
    200 
    201     rnd1 = pred_width_rnd & 1;
    202 
    203     /* Branch based on pixel location (half-pel or full-pel) for x and y */
    204     pred_block -= offset2; /* preset */
    205 
    206     tmp = (uint32)prev & 3;
    207     mask = 254;
    208     mask |= (mask << 8);
    209     mask |= (mask << 16); /* 0xFEFEFEFE */
    210 
    211     if (tmp == 0) /* word-aligned */
    212     {
    213         if (rnd1 == 1)
    214         {
    215             for (i = B_SIZE; i > 0; i--)
    216             {
    217                 word1 = *((uint32*)prev); /* b4 b3 b2 b1 */
    218                 word2 = *((uint32*)(prev += 4)); /* b8 b7 b6 b5 */
    219                 word12 = (word1 >> 8); /* 0 b4 b3 b2 */
    220                 word12 |= (word2 << 24); /* b5 b4 b3 b2 */
    221                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    222                 word1 &= mask;
    223                 word3 &= (~mask); /* 0x1010101, check last bit */
    224                 word12 &= mask;
    225                 word1 >>= 1;
    226                 word1 = word1 + (word12 >> 1);
    227                 word1 += word3;
    228                 *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
    229 
    230                 word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
    231                 word12 = (word2 >> 8); /* 0 b8 b7 b6 */
    232                 word12 |= (word1 << 24); /* b9 b8 b7 b6 */
    233                 word3 = word2 | word12;
    234                 word2 &= mask;
    235                 word3 &= (~mask);  /* 0x1010101, check last bit */
    236                 word12 &= mask;
    237                 word2 >>= 1;
    238                 word2 = word2 + (word12 >> 1);
    239                 word2 += word3;
    240                 *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
    241 
    242                 prev += offset;
    243             }
    244             return 1;
    245         }
    246         else /* rnd1 == 0 */
    247         {
    248             for (i = B_SIZE; i > 0; i--)
    249             {
    250                 word1 = *((uint32*)prev); /* b4 b3 b2 b1 */
    251 
    252                 word2 = *((uint32*)(prev += 4)); /* b8 b7 b6 b5 */
    253                 word12 = (word1 >> 8); /* 0 b4 b3 b2 */
    254                 word12 |= (word2 << 24); /* b5 b4 b3 b2 */
    255                 word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
    256                 word1 &= mask;
    257                 word3 &= (~mask); /* 0x1010101, check last bit */
    258                 word12 &= mask;
    259                 word1 >>= 1;
    260                 word1 = word1 + (word12 >> 1);
    261                 word1 += word3;
    262                 *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
    263 
    264                 word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
    265                 word12 = (word2 >> 8); /* 0 b8 b7 b6 */
    266                 word12 |= (word1 << 24); /* b9 b8 b7 b6 */
    267                 word3 = word2 & word12;
    268                 word2 &= mask;
    269                 word3 &= (~mask);  /* 0x1010101, check last bit */
    270                 word12 &= mask;
    271                 word2 >>= 1;
    272                 word2 = word2 + (word12 >> 1);
    273                 word2 += word3;
    274                 *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
    275 
    276                 prev += offset;
    277             }
    278             return 1;
    279         } /* rnd1 */
    280     }
    281     else if (tmp == 1)
    282     {
    283         prev--; /* word-aligned */
    284         if (rnd1 == 1)
    285         {
    286             for (i = B_SIZE; i > 0; i--)
    287             {
    288                 word1 = *((uint32*)prev); /* b3 b2 b1 b0 */
    289                 word2 = *((uint32*)(prev += 4)); /* b7 b6 b5 b4 */
    290                 word12 = (word1 >> 8); /* 0 b3 b2 b1 */
    291                 word1 >>= 16; /* 0 0 b3 b2 */
    292                 word12 |= (word2 << 24); /* b4 b3 b2 b1 */
    293                 word1 |= (word2 << 16); /* b5 b4 b3 b2 */
    294                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    295                 word1 &= mask;
    296                 word3 &= (~mask); /* 0x1010101, check last bit */
    297                 word12 &= mask;
    298                 word1 >>= 1;
    299                 word1 = word1 + (word12 >> 1);
    300                 word1 += word3;
    301                 *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
    302 
    303                 word1 = *((uint32*)(prev += 4)); /* b11 b10 b9 b8 */
    304                 word12 = (word2 >> 8); /* 0 b7 b6 b5 */
    305                 word2 >>= 16; /* 0 0 b7 b6 */
    306                 word12 |= (word1 << 24); /* b8 b7 b6 b5 */
    307                 word2 |= (word1 << 16); /* b9 b8 b7 b6 */
    308                 word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word2&word12
    309                 word2 &= mask;
    310                 word3 &= (~mask); /* 0x1010101, check last bit */
    311                 word12 &= mask;
    312                 word2 >>= 1;
    313                 word2 = word2 + (word12 >> 1);
    314                 word2 += word3;
    315                 *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
    316 
    317                 prev += offset;
    318             }
    319             return 1;
    320         }
    321         else /* rnd1 = 0 */
    322         {
    323             for (i = B_SIZE; i > 0; i--)
    324             {
    325                 word1 = *((uint32*)prev); /* b3 b2 b1 b0 */
    326 
    327                 word2 = *((uint32*)(prev += 4)); /* b7 b6 b5 b4 */
    328                 word12 = (word1 >> 8); /* 0 b3 b2 b1 */
    329                 word1 >>= 16; /* 0 0 b3 b2 */
    330                 word12 |= (word2 << 24); /* b4 b3 b2 b1 */
    331                 word1 |= (word2 << 16); /* b5 b4 b3 b2 */
    332                 word3 = word1 & word12;
    333                 word1 &= mask;
    334                 word3 &= (~mask); /* 0x1010101, check last bit */
    335                 word12 &= mask;
    336                 word1 >>= 1;
    337                 word1 = word1 + (word12 >> 1);
    338                 word1 += word3;
    339                 *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
    340 
    341                 word1 = *((uint32*)(prev += 4)); /* b11 b10 b9 b8 */
    342                 word12 = (word2 >> 8); /* 0 b7 b6 b5 */
    343                 word2 >>= 16; /* 0 0 b7 b6 */
    344                 word12 |= (word1 << 24); /* b8 b7 b6 b5 */
    345                 word2 |= (word1 << 16); /* b9 b8 b7 b6 */
    346                 word3 = word2 & word12;
    347                 word2 &= mask;
    348                 word3 &= (~mask); /* 0x1010101, check last bit */
    349                 word12 &= mask;
    350                 word2 >>= 1;
    351                 word2 = word2 + (word12 >> 1);
    352                 word2 += word3;
    353                 *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
    354 
    355                 prev += offset;
    356             }
    357             return 1;
    358         } /* rnd1 */
    359     }
    360     else if (tmp == 2)
    361     {
    362         prev -= 2; /* word-aligned */
    363         if (rnd1 == 1)
    364         {
    365             for (i = B_SIZE; i > 0; i--)
    366             {
    367                 word1 = *((uint32*)prev); /* b2 b1 b0 bN1 */
    368                 word2 = *((uint32*)(prev += 4)); /* b6 b5 b4 b3 */
    369                 word12 = (word1 >> 16); /* 0 0 b2 b1 */
    370                 word1 >>= 24; /* 0 0 0 b2 */
    371                 word12 |= (word2 << 16); /* b4 b3 b2 b1 */
    372                 word1 |= (word2 << 8); /* b5 b4 b3 b2 */
    373                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    374                 word1 &= mask;
    375                 word3 &= (~mask); /* 0x1010101, check last bit */
    376                 word12 &= mask;
    377                 word1 >>= 1;
    378                 word1 = word1 + (word12 >> 1);
    379                 word1 += word3;
    380                 *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
    381 
    382                 word1 = *((uint32*)(prev += 4)); /* b10 b9 b8 b7 */
    383                 word12 = (word2 >> 16); /* 0 0 b6 b5 */
    384                 word2 >>= 24; /* 0 0 0 b6 */
    385                 word12 |= (word1 << 16); /* b8 b7 b6 b5 */
    386                 word2 |= (word1 << 8); /* b9 b8 b7 b6 */
    387                 word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    388                 word2 &= mask;
    389                 word3 &= (~mask); /* 0x1010101, check last bit */
    390                 word12 &= mask;
    391                 word2 >>= 1;
    392                 word2 = word2 + (word12 >> 1);
    393                 word2 += word3;
    394                 *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
    395                 prev += offset;
    396             }
    397             return 1;
    398         }
    399         else /* rnd1 == 0 */
    400         {
    401             for (i = B_SIZE; i > 0; i--)
    402             {
    403                 word1 = *((uint32*)prev); /* b2 b1 b0 bN1 */
    404                 word2 = *((uint32*)(prev += 4)); /* b6 b5 b4 b3 */
    405                 word12 = (word1 >> 16); /* 0 0 b2 b1 */
    406                 word1 >>= 24; /* 0 0 0 b2 */
    407                 word12 |= (word2 << 16); /* b4 b3 b2 b1 */
    408                 word1 |= (word2 << 8); /* b5 b4 b3 b2 */
    409                 word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
    410                 word1 &= mask;
    411                 word3 &= (~mask); /* 0x1010101, check last bit */
    412                 word12 &= mask;
    413                 word1 >>= 1;
    414                 word1 = word1 + (word12 >> 1);
    415                 word1 += word3;
    416                 *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
    417 
    418                 word1 = *((uint32*)(prev += 4)); /* b10 b9 b8 b7 */
    419                 word12 = (word2 >> 16); /* 0 0 b6 b5 */
    420                 word2 >>= 24; /* 0 0 0 b6 */
    421                 word12 |= (word1 << 16); /* b8 b7 b6 b5 */
    422                 word2 |= (word1 << 8); /* b9 b8 b7 b6 */
    423                 word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
    424                 word2 &= mask;
    425                 word3 &= (~mask); /* 0x1010101, check last bit */
    426                 word12 &= mask;
    427                 word2 >>= 1;
    428                 word2 = word2 + (word12 >> 1);
    429                 word2 += word3;
    430                 *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
    431                 prev += offset;
    432             }
    433             return 1;
    434         }
    435     }
    436     else /* tmp = 3 */
    437     {
    438         prev -= 3; /* word-aligned */
    439         if (rnd1 == 1)
    440         {
    441             for (i = B_SIZE; i > 0; i--)
    442             {
    443                 word1 = *((uint32*)prev); /* b1 b0 bN1 bN2 */
    444                 word2 = *((uint32*)(prev += 4)); /* b5 b4 b3 b2 */
    445                 word12 = (word1 >> 24); /* 0 0 0 b1 */
    446                 word12 |= (word2 << 8); /* b4 b3 b2 b1 */
    447                 word1 = word2;
    448                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    449                 word1 &= mask;
    450                 word3 &= (~mask); /* 0x1010101, check last bit */
    451                 word12 &= mask;
    452                 word1 >>= 1;
    453                 word1 = word1 + (word12 >> 1);
    454                 word1 += word3;
    455                 *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
    456 
    457                 word1 = *((uint32*)(prev += 4)); /* b9 b8 b7 b6 */
    458                 word12 = (word2 >> 24); /* 0 0 0 b5 */
    459                 word12 |= (word1 << 8); /* b8 b7 b6 b5 */
    460                 word2 = word1; /* b9 b8 b7 b6 */
    461                 word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
    462                 word2 &= mask;
    463                 word3 &= (~mask); /* 0x1010101, check last bit */
    464                 word12 &= mask;
    465                 word2 >>= 1;
    466                 word2 = word2 + (word12 >> 1);
    467                 word2 += word3;
    468                 *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
    469                 prev += offset;
    470             }
    471             return 1;
    472         }
    473         else
    474         {
    475             for (i = B_SIZE; i > 0; i--)
    476             {
    477                 word1 = *((uint32*)prev); /* b1 b0 bN1 bN2 */
    478                 word2 = *((uint32*)(prev += 4)); /* b5 b4 b3 b2 */
    479                 word12 = (word1 >> 24); /* 0 0 0 b1 */
    480                 word12 |= (word2 << 8); /* b4 b3 b2 b1 */
    481                 word1 = word2;
    482                 word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
    483                 word1 &= mask;
    484                 word3 &= (~mask); /* 0x1010101, check last bit */
    485                 word12 &= mask;
    486                 word1 >>= 1;
    487                 word1 = word1 + (word12 >> 1);
    488                 word1 += word3;
    489                 *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
    490 
    491                 word1 = *((uint32*)(prev += 4)); /* b9 b8 b7 b6 */
    492                 word12 = (word2 >> 24); /* 0 0 0 b5 */
    493                 word12 |= (word1 << 8); /* b8 b7 b6 b5 */
    494                 word2 = word1; /* b9 b8 b7 b6 */
    495                 word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
    496                 word2 &= mask;
    497                 word3 &= (~mask); /* 0x1010101, check last bit */
    498                 word12 &= mask;
    499                 word2 >>= 1;
    500                 word2 = word2 + (word12 >> 1);
    501                 word2 += word3;
    502                 *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
    503                 prev += offset;
    504             }
    505             return 1;
    506         }
    507     }
    508 }
    509 
    510 /**************************************************************************/
    511 int GetPredAdvancedBy1x0(
    512     uint8 *prev,        /* i */
    513     uint8 *pred_block,      /* i */
    514     int width,      /* i */
    515     int pred_width_rnd /* i */
    516 )
    517 {
    518     uint    i;      /* loop variable */
    519     int offset, offset2;
    520     uint32  word1, word2, word3, word12, word22;
    521     int tmp;
    522     int rnd1;
    523     uint32 mask;
    524 
    525     /* initialize offset to adjust pixel counter */
    526     /*    the next row; full-pel resolution      */
    527     offset = width - B_SIZE; /* offset for prev */
    528     offset2 = (pred_width_rnd >> 1) - 4; /* offset for pred_block */
    529 
    530     rnd1 = pred_width_rnd & 1;
    531 
    532     /* Branch based on pixel location (half-pel or full-pel) for x and y */
    533     pred_block -= offset2; /* preset */
    534 
    535     tmp = (uint32)prev & 3;
    536     mask = 254;
    537     mask |= (mask << 8);
    538     mask |= (mask << 16); /* 0xFEFEFEFE */
    539 
    540     if (tmp == 0) /* word-aligned */
    541     {
    542         prev -= 4;
    543         if (rnd1 == 1)
    544         {
    545             for (i = B_SIZE; i > 0; i--)
    546             {
    547                 word1 = *((uint32*)(prev += 4));
    548                 word2 = *((uint32*)(prev + width));
    549                 word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
    550                 word1 &= mask;
    551                 word3 &= (~mask); /* 0x1010101, check last bit */
    552                 word2 &= mask;
    553                 word1 >>= 1;
    554                 word1 = word1 + (word2 >> 1);
    555                 word1 += word3;
    556                 *((uint32*)(pred_block += offset2)) = word1;
    557                 word1 = *((uint32*)(prev += 4));
    558                 word2 = *((uint32*)(prev + width));
    559                 word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
    560                 word1 &= mask;
    561                 word3 &= (~mask); /* 0x1010101, check last bit */
    562                 word2 &= mask;
    563                 word1 >>= 1;
    564                 word1 = word1 + (word2 >> 1);
    565                 word1 += word3;
    566                 *((uint32*)(pred_block += 4)) = word1;
    567 
    568                 prev += offset;
    569             }
    570             return 1;
    571         }
    572         else   /* rnd1 = 0 */
    573         {
    574             for (i = B_SIZE; i > 0; i--)
    575             {
    576                 word1 = *((uint32*)(prev += 4));
    577                 word2 = *((uint32*)(prev + width));
    578                 word3 = word1 & word2;  /* rnd1 = 0; */
    579                 word1 &= mask;
    580                 word3 &= (~mask); /* 0x1010101, check last bit */
    581                 word2 &= mask;
    582                 word1 >>= 1;
    583                 word1 = word1 + (word2 >> 1);
    584                 word1 += word3;
    585                 *((uint32*)(pred_block += offset2)) = word1;
    586                 word1 = *((uint32*)(prev += 4));
    587                 word2 = *((uint32*)(prev + width));
    588                 word3 = word1 & word2;  /* rnd1 = 0; */
    589                 word1 &= mask;
    590                 word3 &= (~mask); /* 0x1010101, check last bit */
    591                 word2 &= mask;
    592                 word1 >>= 1;
    593                 word1 = word1 + (word2 >> 1);
    594                 word1 += word3;
    595                 *((uint32*)(pred_block += 4)) = word1;
    596 
    597                 prev += offset;
    598             }
    599             return 1;
    600         }
    601     }
    602     else if (tmp == 1)
    603     {
    604         prev--; /* word-aligned */
    605         if (rnd1 == 1)
    606         {
    607             for (i = B_SIZE; i > 0; i--)
    608             {
    609                 word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
    610                 word22 = *((uint32*)(prev + width));
    611 
    612                 word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
    613                 word2 = *((uint32*)(prev + width));
    614                 word12 >>= 8; /* 0 b4 b3 b2 */
    615                 word22 >>= 8;
    616                 word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
    617                 word22 = word22 | (word2 << 24);
    618                 word3 = word12 | word22;
    619                 word12 &= mask;
    620                 word22 &= mask;
    621                 word3 &= (~mask); /* 0x1010101, check last bit */
    622                 word12 >>= 1;
    623                 word12 = word12 + (word22 >> 1);
    624                 word12 += word3;
    625                 *((uint32*)(pred_block += offset2)) = word12;
    626 
    627                 word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
    628                 word22 = *((uint32*)(prev + width));
    629                 word1 >>= 8; /* 0 b8 b7 b6 */
    630                 word2 >>= 8;
    631                 word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
    632                 word2 = word2 | (word22 << 24);
    633                 word3 = word1 | word2;
    634                 word1 &= mask;
    635                 word2 &= mask;
    636                 word3 &= (~mask); /* 0x1010101, check last bit */
    637                 word1 >>= 1;
    638                 word1 = word1 + (word2 >> 1);
    639                 word1 += word3;
    640                 *((uint32*)(pred_block += 4)) = word1;
    641                 prev += offset;
    642             }
    643             return 1;
    644         }
    645         else /* rnd1 = 0 */
    646         {
    647             for (i = B_SIZE; i > 0; i--)
    648             {
    649                 word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
    650                 word22 = *((uint32*)(prev + width));
    651 
    652                 word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
    653                 word2 = *((uint32*)(prev + width));
    654                 word12 >>= 8; /* 0 b4 b3 b2 */
    655                 word22 >>= 8;
    656                 word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
    657                 word22 = word22 | (word2 << 24);
    658                 word3 = word12 & word22;
    659                 word12 &= mask;
    660                 word22 &= mask;
    661                 word3 &= (~mask); /* 0x1010101, check last bit */
    662                 word12 >>= 1;
    663                 word12 = word12 + (word22 >> 1);
    664                 word12 += word3;
    665                 *((uint32*)(pred_block += offset2)) = word12;
    666 
    667                 word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
    668                 word22 = *((uint32*)(prev + width));
    669                 word1 >>= 8; /* 0 b8 b7 b6 */
    670                 word2 >>= 8;
    671                 word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
    672                 word2 = word2 | (word22 << 24);
    673                 word3 = word1 & word2;
    674                 word1 &= mask;
    675                 word2 &= mask;
    676                 word3 &= (~mask); /* 0x1010101, check last bit */
    677                 word1 >>= 1;
    678                 word1 = word1 + (word2 >> 1);
    679                 word1 += word3;
    680                 *((uint32*)(pred_block += 4)) = word1;
    681                 prev += offset;
    682             }
    683             return 1;
    684         }
    685     }
    686     else if (tmp == 2)
    687     {
    688         prev -= 2; /* word-aligned */
    689         if (rnd1 == 1)
    690         {
    691             for (i = B_SIZE; i > 0; i--)
    692             {
    693                 word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
    694                 word22 = *((uint32*)(prev + width));
    695 
    696                 word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
    697                 word2 = *((uint32*)(prev + width));
    698                 word12 >>= 16; /* 0 0 b4 b3 */
    699                 word22 >>= 16;
    700                 word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
    701                 word22 = word22 | (word2 << 16);
    702                 word3 = word12 | word22;
    703                 word12 &= mask;
    704                 word22 &= mask;
    705                 word3 &= (~mask); /* 0x1010101, check last bit */
    706                 word12 >>= 1;
    707                 word12 = word12 + (word22 >> 1);
    708                 word12 += word3;
    709                 *((uint32*)(pred_block += offset2)) = word12;
    710 
    711                 word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
    712                 word22 = *((uint32*)(prev + width));
    713                 word1 >>= 16; /* 0 0 b8 b7 */
    714                 word2 >>= 16;
    715                 word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
    716                 word2 = word2 | (word22 << 16);
    717                 word3 = word1 | word2;
    718                 word1 &= mask;
    719                 word2 &= mask;
    720                 word3 &= (~mask); /* 0x1010101, check last bit */
    721                 word1 >>= 1;
    722                 word1 = word1 + (word2 >> 1);
    723                 word1 += word3;
    724                 *((uint32*)(pred_block += 4)) = word1;
    725                 prev += offset;
    726             }
    727             return 1;
    728         }
    729         else /* rnd1 = 0 */
    730         {
    731             for (i = B_SIZE; i > 0; i--)
    732             {
    733                 word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
    734                 word22 = *((uint32*)(prev + width));
    735 
    736                 word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
    737                 word2 = *((uint32*)(prev + width));
    738                 word12 >>= 16; /* 0 0 b4 b3 */
    739                 word22 >>= 16;
    740                 word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
    741                 word22 = word22 | (word2 << 16);
    742                 word3 = word12 & word22;
    743                 word12 &= mask;
    744                 word22 &= mask;
    745                 word3 &= (~mask); /* 0x1010101, check last bit */
    746                 word12 >>= 1;
    747                 word12 = word12 + (word22 >> 1);
    748                 word12 += word3;
    749                 *((uint32*)(pred_block += offset2)) = word12;
    750 
    751                 word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
    752                 word22 = *((uint32*)(prev + width));
    753                 word1 >>= 16; /* 0 0 b8 b7 */
    754                 word2 >>= 16;
    755                 word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
    756                 word2 = word2 | (word22 << 16);
    757                 word3 = word1 & word2;
    758                 word1 &= mask;
    759                 word2 &= mask;
    760                 word3 &= (~mask); /* 0x1010101, check last bit */
    761                 word1 >>= 1;
    762                 word1 = word1 + (word2 >> 1);
    763                 word1 += word3;
    764                 *((uint32*)(pred_block += 4)) = word1;
    765                 prev += offset;
    766             }
    767 
    768             return 1;
    769         }
    770     }
    771     else /* tmp == 3 */
    772     {
    773         prev -= 3; /* word-aligned */
    774         if (rnd1 == 1)
    775         {
    776             for (i = B_SIZE; i > 0; i--)
    777             {
    778                 word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
    779                 word22 = *((uint32*)(prev + width));
    780 
    781                 word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
    782                 word2 = *((uint32*)(prev + width));
    783                 word12 >>= 24; /* 0 0 0 b4 */
    784                 word22 >>= 24;
    785                 word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
    786                 word22 = word22 | (word2 << 8);
    787                 word3 = word12 | word22;
    788                 word12 &= mask;
    789                 word22 &= mask;
    790                 word3 &= (~mask); /* 0x1010101, check last bit */
    791                 word12 >>= 1;
    792                 word12 = word12 + (word22 >> 1);
    793                 word12 += word3;
    794                 *((uint32*)(pred_block += offset2)) = word12;
    795 
    796                 word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
    797                 word22 = *((uint32*)(prev + width));
    798                 word1 >>= 24; /* 0 0 0 b8 */
    799                 word2 >>= 24;
    800                 word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
    801                 word2 = word2 | (word22 << 8);
    802                 word3 = word1 | word2;
    803                 word1 &= mask;
    804                 word2 &= mask;
    805                 word3 &= (~mask); /* 0x1010101, check last bit */
    806                 word1 >>= 1;
    807                 word1 = word1 + (word2 >> 1);
    808                 word1 += word3;
    809                 *((uint32*)(pred_block += 4)) = word1;
    810                 prev += offset;
    811             }
    812             return 1;
    813         }
    814         else /* rnd1 = 0 */
    815         {
    816             for (i = B_SIZE; i > 0; i--)
    817             {
    818                 word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
    819                 word22 = *((uint32*)(prev + width));
    820 
    821                 word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
    822                 word2 = *((uint32*)(prev + width));
    823                 word12 >>= 24; /* 0 0 0 b4 */
    824                 word22 >>= 24;
    825                 word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
    826                 word22 = word22 | (word2 << 8);
    827                 word3 = word12 & word22;
    828                 word12 &= mask;
    829                 word22 &= mask;
    830                 word3 &= (~mask); /* 0x1010101, check last bit */
    831                 word12 >>= 1;
    832                 word12 = word12 + (word22 >> 1);
    833                 word12 += word3;
    834                 *((uint32*)(pred_block += offset2)) = word12;
    835 
    836                 word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
    837                 word22 = *((uint32*)(prev + width));
    838                 word1 >>= 24; /* 0 0 0 b8 */
    839                 word2 >>= 24;
    840                 word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
    841                 word2 = word2 | (word22 << 8);
    842                 word3 = word1 & word2;
    843                 word1 &= mask;
    844                 word2 &= mask;
    845                 word3 &= (~mask); /* 0x1010101, check last bit */
    846                 word1 >>= 1;
    847                 word1 = word1 + (word2 >> 1);
    848                 word1 += word3;
    849                 *((uint32*)(pred_block += 4)) = word1;
    850                 prev += offset;
    851             }
    852             return 1;
    853         } /* rnd */
    854     } /* tmp */
    855 }
    856 
    857 /**********************************************************************************/
    858 int GetPredAdvancedBy1x1(
    859     uint8 *prev,        /* i */
    860     uint8 *pred_block,      /* i */
    861     int width,      /* i */
    862     int pred_width_rnd /* i */
    863 )
    864 {
    865     uint    i;      /* loop variable */
    866     int offset, offset2;
    867     uint32  x1, x2, x1m, x2m, y1, y2, y1m, y2m; /* new way */
    868     int tmp;
    869     int rnd1, rnd2;
    870     uint32 mask;
    871 
    872     /* initialize offset to adjust pixel counter */
    873     /*    the next row; full-pel resolution      */
    874     offset = width - B_SIZE; /* offset for prev */
    875     offset2 = (pred_width_rnd >> 1) - 8; /* offset for pred_block */
    876 
    877     rnd1 = pred_width_rnd & 1;
    878 
    879     rnd2 = rnd1 + 1;
    880     rnd2 |= (rnd2 << 8);
    881     rnd2 |= (rnd2 << 16);
    882 
    883     mask = 0x3F;
    884     mask |= (mask << 8);
    885     mask |= (mask << 16); /* 0x3f3f3f3f */
    886 
    887     tmp = (uint32)prev & 3;
    888 
    889     pred_block -= 4; /* preset */
    890 
    891     if (tmp == 0) /* word-aligned */
    892     {
    893         for (i = B_SIZE; i > 0; i--)
    894         {
    895             x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
    896             x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
    897             y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
    898             y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
    899 
    900             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
    901             x2m = (x2 >> 2) & mask;
    902             x1 = x1 ^(x1m << 2);
    903             x2 = x2 ^(x2m << 2);
    904             x1m += x2m;
    905             x1 += x2;
    906 
    907             /* x2m, x2 free */
    908             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
    909             y2m = (y2 >> 2) & mask;
    910             y1 = y1 ^(y1m << 2);
    911             y2 = y2 ^(y2m << 2);
    912             y1m += y2m;
    913             y1 += y2;
    914 
    915             /* y2m, y2 free */
    916             /* x2m, x2 free */
    917             x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
    918             y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
    919             x2m = (x2 >> 2) & mask;
    920             y2m = (y2 >> 2) & mask;
    921             x2 = x2 ^(x2m << 2);
    922             y2 = y2 ^(y2m << 2);
    923             x2m += y2m;
    924             x2 += y2;
    925             /* y2m, y2 free */
    926 
    927             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
    928             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
    929             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
    930             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
    931             /* x1, y1, x2 */
    932 
    933             y2m = x1m >> 8;
    934             y2 = x1 >> 8;
    935             y2m |= (y1m << 24);  /* a4+b4, a3+b3, a2+b2, a1+b1 */
    936             y2 |= (y1 << 24);
    937             x1m += y2m;  /* a3+b3+a4+b4, ....., a0+b0+a1+b1 */
    938             x1 += y2;
    939             x1 += rnd2;
    940             x1 &= (mask << 2);
    941             x1m += (x1 >> 2);
    942             *((uint32*)(pred_block += 4)) = x1m; /* save x1m */
    943 
    944             y2m = y1m >> 8;
    945             y2 = y1 >> 8;
    946             y2m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
    947             y2 |= (x2 << 24);
    948             y1m += y2m;  /* a7+b7+a8+b8, ....., a4+b4+a5+b5 */
    949             y1 += y2;
    950             y1 += rnd2;
    951             y1 &= (mask << 2);
    952             y1m += (y1 >> 2);
    953             *((uint32*)(pred_block += 4)) = y1m; /* save y1m */
    954 
    955             pred_block += offset2;
    956             prev += offset;
    957         }
    958 
    959         return 1;
    960     }
    961     else if (tmp == 1)
    962     {
    963         prev--; /* to word-aligned */
    964         for (i = B_SIZE; i > 0; i--)
    965         {
    966             x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
    967             x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
    968             y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
    969             y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
    970 
    971             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
    972             x2m = (x2 >> 2) & mask;
    973             x1 = x1 ^(x1m << 2);
    974             x2 = x2 ^(x2m << 2);
    975             x1m += x2m;
    976             x1 += x2;
    977 
    978             /* x2m, x2 free */
    979             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
    980             y2m = (y2 >> 2) & mask;
    981             y1 = y1 ^(y1m << 2);
    982             y2 = y2 ^(y2m << 2);
    983             y1m += y2m;
    984             y1 += y2;
    985 
    986             /* y2m, y2 free */
    987             /* x2m, x2 free */
    988             x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
    989             y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
    990             x2m = (x2 >> 2) & mask;
    991             y2m = (y2 >> 2) & mask;
    992             x2 = x2 ^(x2m << 2);
    993             y2 = y2 ^(y2m << 2);
    994             x2m += y2m;
    995             x2 += y2;
    996             /* y2m, y2 free */
    997 
    998             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
    999             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
   1000             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
   1001             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
   1002             /* x1, y1, x2 */
   1003 
   1004             x1m >>= 8 ;
   1005             x1 >>= 8;
   1006             x1m |= (y1m << 24);  /* a4+b4, a3+b3, a2+b2, a1+b1 */
   1007             x1 |= (y1 << 24);
   1008             y2m = (y1m << 16);
   1009             y2 = (y1 << 16);
   1010             y2m |= (x1m >> 8); /* a5+b5, a4+b4, a3+b3, a2+b2 */
   1011             y2 |= (x1 >> 8);
   1012             x1 += rnd2;
   1013             x1m += y2m;  /* a4+b4+a5+b5, ....., a1+b1+a2+b2 */
   1014             x1 += y2;
   1015             x1 &= (mask << 2);
   1016             x1m += (x1 >> 2);
   1017             *((uint32*)(pred_block += 4)) = x1m; /* save x1m */
   1018 
   1019             y1m >>= 8;
   1020             y1 >>= 8;
   1021             y1m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
   1022             y1 |= (x2 << 24);
   1023             y2m = (x2m << 16);
   1024             y2 = (x2 << 16);
   1025             y2m |= (y1m >> 8); /*  a9+b9, a8+b8, a7+b7, a6+b6,*/
   1026             y2 |= (y1 >> 8);
   1027             y1 += rnd2;
   1028             y1m += y2m;  /* a8+b8+a9+b9, ....., a5+b5+a6+b6 */
   1029             y1 += y2;
   1030             y1 &= (mask << 2);
   1031             y1m += (y1 >> 2);
   1032             *((uint32*)(pred_block += 4)) = y1m; /* save y1m */
   1033 
   1034             pred_block += offset2;
   1035             prev += offset;
   1036         }
   1037         return 1;
   1038     }
   1039     else if (tmp == 2)
   1040     {
   1041         prev -= 2; /* to word-aligned */
   1042         for (i = B_SIZE; i > 0; i--)
   1043         {
   1044             x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
   1045             x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
   1046             y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
   1047             y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
   1048 
   1049             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
   1050             x2m = (x2 >> 2) & mask;
   1051             x1 = x1 ^(x1m << 2);
   1052             x2 = x2 ^(x2m << 2);
   1053             x1m += x2m;
   1054             x1 += x2;
   1055 
   1056             /* x2m, x2 free */
   1057             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
   1058             y2m = (y2 >> 2) & mask;
   1059             y1 = y1 ^(y1m << 2);
   1060             y2 = y2 ^(y2m << 2);
   1061             y1m += y2m;
   1062             y1 += y2;
   1063 
   1064             /* y2m, y2 free */
   1065             /* x2m, x2 free */
   1066             x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
   1067             y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
   1068             x2m = (x2 >> 2) & mask;
   1069             y2m = (y2 >> 2) & mask;
   1070             x2 = x2 ^(x2m << 2);
   1071             y2 = y2 ^(y2m << 2);
   1072             x2m += y2m;
   1073             x2 += y2;
   1074             /* y2m, y2 free */
   1075 
   1076             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
   1077             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
   1078             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
   1079             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
   1080             /* x1, y1, x2 */
   1081 
   1082             x1m >>= 16 ;
   1083             x1 >>= 16;
   1084             x1m |= (y1m << 16);  /* a5+b5, a4+b4, a3+b3, a2+b2 */
   1085             x1 |= (y1 << 16);
   1086             y2m = (y1m << 8);
   1087             y2 = (y1 << 8);
   1088             y2m |= (x1m >> 8); /* a6+b6, a5+b5, a4+b4, a3+b3 */
   1089             y2 |= (x1 >> 8);
   1090             x1 += rnd2;
   1091             x1m += y2m;  /* a5+b5+a6+b6, ....., a2+b2+a3+b3 */
   1092             x1 += y2;
   1093             x1 &= (mask << 2);
   1094             x1m += (x1 >> 2);
   1095             *((uint32*)(pred_block += 4)) = x1m; /* save x1m */
   1096 
   1097             y1m >>= 16;
   1098             y1 >>= 16;
   1099             y1m |= (x2m << 16); /* a9+b9, a8+b8, a7+b7, a6+b6 */
   1100             y1 |= (x2 << 16);
   1101             y2m = (x2m << 8);
   1102             y2 = (x2 << 8);
   1103             y2m |= (y1m >> 8); /*  a10+b10, a9+b9, a8+b8, a7+b7,*/
   1104             y2 |= (y1 >> 8);
   1105             y1 += rnd2;
   1106             y1m += y2m;  /* a9+b9+a10+b10, ....., a6+b6+a7+b7 */
   1107             y1 += y2;
   1108             y1 &= (mask << 2);
   1109             y1m += (y1 >> 2);
   1110             *((uint32*)(pred_block += 4)) = y1m; /* save y1m */
   1111 
   1112             pred_block += offset2;
   1113             prev += offset;
   1114         }
   1115         return 1;
   1116     }
   1117     else /* tmp == 3 */
   1118     {
   1119         prev -= 3; /* to word-aligned */
   1120         for (i = B_SIZE; i > 0; i--)
   1121         {
   1122             x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
   1123             x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
   1124             y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
   1125             y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
   1126 
   1127             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
   1128             x2m = (x2 >> 2) & mask;
   1129             x1 = x1 ^(x1m << 2);
   1130             x2 = x2 ^(x2m << 2);
   1131             x1m += x2m;
   1132             x1 += x2;
   1133 
   1134             /* x2m, x2 free */
   1135             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
   1136             y2m = (y2 >> 2) & mask;
   1137             y1 = y1 ^(y1m << 2);
   1138             y2 = y2 ^(y2m << 2);
   1139             y1m += y2m;
   1140             y1 += y2;
   1141 
   1142             /* y2m, y2 free */
   1143             /* x2m, x2 free */
   1144             x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
   1145             y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
   1146             x2m = (x2 >> 2) & mask;
   1147             y2m = (y2 >> 2) & mask;
   1148             x2 = x2 ^(x2m << 2);
   1149             y2 = y2 ^(y2m << 2);
   1150             x2m += y2m;
   1151             x2 += y2;
   1152             /* y2m, y2 free */
   1153 
   1154             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
   1155             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
   1156             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
   1157             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
   1158             /* x1, y1, x2 */
   1159 
   1160             x1m >>= 24 ;
   1161             x1 >>= 24;
   1162             x1m |= (y1m << 8);  /* a6+b6, a5+b5, a4+b4, a3+b3 */
   1163             x1 |= (y1 << 8);
   1164 
   1165             x1m += y1m;  /* a6+b6+a7+b7, ....., a3+b3+a4+b4 */
   1166             x1 += y1;
   1167             x1 += rnd2;
   1168             x1 &= (mask << 2);
   1169             x1m += (x1 >> 2);
   1170             *((uint32*)(pred_block += 4)) = x1m; /* save x1m */
   1171 
   1172             y1m >>= 24;
   1173             y1 >>= 24;
   1174             y1m |= (x2m << 8); /* a10+b10, a9+b9, a8+b8, a7+b7 */
   1175             y1 |= (x2 << 8);
   1176             y1m += x2m;  /* a10+b10+a11+b11, ....., a7+b7+a8+b8 */
   1177             y1 += x2;
   1178             y1 += rnd2;
   1179             y1 &= (mask << 2);
   1180             y1m += (y1 >> 2);
   1181             *((uint32*)(pred_block += 4)) = y1m; /* save y1m */
   1182 
   1183             pred_block += offset2;
   1184             prev += offset;
   1185         }
   1186         return 1;
   1187     }
   1188 }
   1189 
   1190 
   1191