Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 #include "avclib_common.h"
     19 
     20 /* input are in the first 16 elements of block,
     21    output must be in the location specified in Figure 8-6. */
     22 /* subclause 8.5.6 */
     23 void Intra16DCTrans(int16 *block, int Qq, int Rq)
     24 {
     25     int m0, m1, m2, m3;
     26     int j, offset;
     27     int16 *inout;
     28     int scale = dequant_coefres[Rq][0];
     29 
     30     inout = block;
     31     for (j = 0; j < 4; j++)
     32     {
     33         m0 = inout[0] + inout[4];
     34         m1 = inout[0] - inout[4];
     35         m2 = inout[8] + inout[12];
     36         m3 = inout[8] - inout[12];
     37 
     38 
     39         inout[0] = m0 + m2;
     40         inout[4] = m0 - m2;
     41         inout[8] = m1 - m3;
     42         inout[12] = m1 + m3;
     43         inout += 64;
     44     }
     45 
     46     inout = block;
     47 
     48     if (Qq >= 2)  /* this way should be faster than JM */
     49     {           /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */
     50         Qq -= 2;
     51         for (j = 0; j < 4; j++)
     52         {
     53             m0 = inout[0] + inout[64];
     54             m1 = inout[0] - inout[64];
     55             m2 = inout[128] + inout[192];
     56             m3 = inout[128] - inout[192];
     57 
     58             inout[0] = ((m0 + m2) * scale) << Qq;
     59             inout[64] = ((m0 - m2) * scale) << Qq;
     60             inout[128] = ((m1 - m3) * scale) << Qq;
     61             inout[192] = ((m1 + m3) * scale) << Qq;
     62             inout += 4;
     63         }
     64     }
     65     else
     66     {
     67         Qq = 2 - Qq;
     68         offset = 1 << (Qq - 1);
     69 
     70         for (j = 0; j < 4; j++)
     71         {
     72             m0 = inout[0] + inout[64];
     73             m1 = inout[0] - inout[64];
     74             m2 = inout[128] + inout[192];
     75             m3 = inout[128] - inout[192];
     76 
     77             inout[0] = (((m0 + m2) * scale + offset) >> Qq);
     78             inout[64] = (((m0 - m2) * scale + offset) >> Qq);
     79             inout[128] = (((m1 - m3) * scale + offset) >> Qq);
     80             inout[192] = (((m1 + m3) * scale + offset) >> Qq);
     81             inout += 4;
     82         }
     83     }
     84 
     85     return ;
     86 }
     87 
     88 /* see subclase 8.5.8 */
     89 void itrans(int16 *block, uint8 *pred, uint8 *cur, int width)
     90 {
     91     int e0, e1, e2, e3; /* note, at every step of the calculation, these values */
     92     /* shall never exceed 16bit sign value, but we don't check */
     93     int i;           /* to save the cycles. */
     94     int16 *inout;
     95 
     96     inout = block;
     97 
     98     for (i = 4; i > 0; i--)
     99     {
    100         e0 = inout[0] + inout[2];
    101         e1 = inout[0] - inout[2];
    102         e2 = (inout[1] >> 1) - inout[3];
    103         e3 = inout[1] + (inout[3] >> 1);
    104 
    105         inout[0] = e0 + e3;
    106         inout[1] = e1 + e2;
    107         inout[2] = e1 - e2;
    108         inout[3] = e0 - e3;
    109 
    110         inout += 16;
    111     }
    112 
    113     for (i = 4; i > 0; i--)
    114     {
    115         e0 = block[0] + block[32];
    116         e1 = block[0] - block[32];
    117         e2 = (block[16] >> 1) - block[48];
    118         e3 = block[16] + (block[48] >> 1);
    119 
    120         e0 += e3;
    121         e3 = (e0 - (e3 << 1)); /* e0-e3 */
    122         e1 += e2;
    123         e2 = (e1 - (e2 << 1)); /* e1-e2 */
    124         e0 += 32;
    125         e1 += 32;
    126         e2 += 32;
    127         e3 += 32;
    128 #ifdef USE_PRED_BLOCK
    129         e0 = pred[0] + (e0 >> 6);
    130         if ((uint)e0 > 0xFF)   e0 = 0xFF & (~(e0 >> 31));  /* clip */
    131         e1 = pred[20] + (e1 >> 6);
    132         if ((uint)e1 > 0xFF)   e1 = 0xFF & (~(e1 >> 31));  /* clip */
    133         e2 = pred[40] + (e2 >> 6);
    134         if ((uint)e2 > 0xFF)   e2 = 0xFF & (~(e2 >> 31));  /* clip */
    135         e3 = pred[60] + (e3 >> 6);
    136         if ((uint)e3 > 0xFF)   e3 = 0xFF & (~(e3 >> 31));  /* clip */
    137         *cur = e0;
    138         *(cur += width) = e1;
    139         *(cur += width) = e2;
    140         cur[width] = e3;
    141         cur -= (width << 1);
    142         cur++;
    143         pred++;
    144 #else
    145         OSCL_UNUSED_ARG(pred);
    146 
    147         e0 = *cur + (e0 >> 6);
    148         if ((uint)e0 > 0xFF)   e0 = 0xFF & (~(e0 >> 31));  /* clip */
    149         *cur = e0;
    150         e1 = *(cur += width) + (e1 >> 6);
    151         if ((uint)e1 > 0xFF)   e1 = 0xFF & (~(e1 >> 31));  /* clip */
    152         *cur = e1;
    153         e2 = *(cur += width) + (e2 >> 6);
    154         if ((uint)e2 > 0xFF)   e2 = 0xFF & (~(e2 >> 31));  /* clip */
    155         *cur = e2;
    156         e3 = cur[width] + (e3 >> 6);
    157         if ((uint)e3 > 0xFF)   e3 = 0xFF & (~(e3 >> 31));  /* clip */
    158         cur[width] = e3;
    159         cur -= (width << 1);
    160         cur++;
    161 #endif
    162         block++;
    163     }
    164 
    165     return ;
    166 }
    167 
    168 /* see subclase 8.5.8 */
    169 void ictrans(int16 *block, uint8 *pred, uint8 *cur, int width)
    170 {
    171     int e0, e1, e2, e3; /* note, at every step of the calculation, these values */
    172     /* shall never exceed 16bit sign value, but we don't check */
    173     int i;           /* to save the cycles. */
    174     int16 *inout;
    175 
    176     inout = block;
    177 
    178     for (i = 4; i > 0; i--)
    179     {
    180         e0 = inout[0] + inout[2];
    181         e1 = inout[0] - inout[2];
    182         e2 = (inout[1] >> 1) - inout[3];
    183         e3 = inout[1] + (inout[3] >> 1);
    184 
    185         inout[0] = e0 + e3;
    186         inout[1] = e1 + e2;
    187         inout[2] = e1 - e2;
    188         inout[3] = e0 - e3;
    189 
    190         inout += 16;
    191     }
    192 
    193     for (i = 4; i > 0; i--)
    194     {
    195         e0 = block[0] + block[32];
    196         e1 = block[0] - block[32];
    197         e2 = (block[16] >> 1) - block[48];
    198         e3 = block[16] + (block[48] >> 1);
    199 
    200         e0 += e3;
    201         e3 = (e0 - (e3 << 1)); /* e0-e3 */
    202         e1 += e2;
    203         e2 = (e1 - (e2 << 1)); /* e1-e2 */
    204         e0 += 32;
    205         e1 += 32;
    206         e2 += 32;
    207         e3 += 32;
    208 #ifdef USE_PRED_BLOCK
    209         e0 = pred[0] + (e0 >> 6);
    210         if ((uint)e0 > 0xFF)   e0 = 0xFF & (~(e0 >> 31));  /* clip */
    211         e1 = pred[12] + (e1 >> 6);
    212         if ((uint)e1 > 0xFF)   e1 = 0xFF & (~(e1 >> 31));  /* clip */
    213         e2 = pred[24] + (e2 >> 6);
    214         if ((uint)e2 > 0xFF)   e2 = 0xFF & (~(e2 >> 31));  /* clip */
    215         e3 = pred[36] + (e3 >> 6);
    216         if ((uint)e3 > 0xFF)   e3 = 0xFF & (~(e3 >> 31));  /* clip */
    217         *cur = e0;
    218         *(cur += width) = e1;
    219         *(cur += width) = e2;
    220         cur[width] = e3;
    221         cur -= (width << 1);
    222         cur++;
    223         pred++;
    224 #else
    225         OSCL_UNUSED_ARG(pred);
    226 
    227         e0 = *cur + (e0 >> 6);
    228         if ((uint)e0 > 0xFF)   e0 = 0xFF & (~(e0 >> 31));  /* clip */
    229         *cur = e0;
    230         e1 = *(cur += width) + (e1 >> 6);
    231         if ((uint)e1 > 0xFF)   e1 = 0xFF & (~(e1 >> 31));  /* clip */
    232         *cur = e1;
    233         e2 = *(cur += width) + (e2 >> 6);
    234         if ((uint)e2 > 0xFF)   e2 = 0xFF & (~(e2 >> 31));  /* clip */
    235         *cur = e2;
    236         e3 = cur[width] + (e3 >> 6);
    237         if ((uint)e3 > 0xFF)   e3 = 0xFF & (~(e3 >> 31));  /* clip */
    238         cur[width] = e3;
    239         cur -= (width << 1);
    240         cur++;
    241 #endif
    242         block++;
    243     }
    244 
    245     return ;
    246 }
    247 
    248 /* see subclause 8.5.7 */
    249 void ChromaDCTrans(int16 *block, int Qq, int Rq)
    250 {
    251     int c00, c01, c10, c11;
    252     int f0, f1, f2, f3;
    253     int scale = dequant_coefres[Rq][0];
    254 
    255     c00 = block[0] + block[4];
    256     c01 = block[0] - block[4];
    257     c10 = block[64] + block[68];
    258     c11 = block[64] - block[68];
    259 
    260     f0 = c00 + c10;
    261     f1 = c01 + c11;
    262     f2 = c00 - c10;
    263     f3 = c01 - c11;
    264 
    265     if (Qq >= 1)
    266     {
    267         Qq -= 1;
    268         block[0] = (f0 * scale) << Qq;
    269         block[4] = (f1 * scale) << Qq;
    270         block[64] = (f2 * scale) << Qq;
    271         block[68] = (f3 * scale) << Qq;
    272     }
    273     else
    274     {
    275         block[0] = (f0 * scale) >> 1;
    276         block[4] = (f1 * scale) >> 1;
    277         block[64] = (f2 * scale) >> 1;
    278         block[68] = (f3 * scale) >> 1;
    279     }
    280 
    281     return ;
    282 }
    283 
    284 
    285 void copy_block(uint8 *pred, uint8 *cur, int width, int pred_pitch)
    286 {
    287     uint32 temp;
    288 
    289     temp = *((uint32*)pred);
    290     pred += pred_pitch;
    291     *((uint32*)cur) = temp;
    292     cur += width;
    293     temp = *((uint32*)pred);
    294     pred += pred_pitch;
    295     *((uint32*)cur) = temp;
    296     cur += width;
    297     temp = *((uint32*)pred);
    298     pred += pred_pitch;
    299     *((uint32*)cur) = temp;
    300     cur += width;
    301     temp = *((uint32*)pred);
    302     *((uint32*)cur) = temp;
    303 
    304     return ;
    305 }
    306 
    307 
    308