Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 #include "mp4def.h"
     19 #include "idct.h"
     20 #include "motion_comp.h"
     21 
     22 #ifdef FAST_IDCT
     23 
     24 /****************************************************************
     25 *       vca_idct.c : created 6/1/99 for several options
     26 *                     of hard-coded reduced idct function (using nz_coefs)
     27 ******************************************************************/
     28 
     29 /*****************************************************/
     30 //pretested version
     31 void idctrow0(int16 *, uint8 *, uint8 *, int)
     32 {
     33     return ;
     34 }
     35 void idctcol0(int16 *)
     36 {
     37     return ;
     38 }
     39 
     40 void idctrow1(int16 *blk, uint8 *pred, uint8 *dst, int width)
     41 {
     42     /* shortcut */
     43     int tmp;
     44     int i = 8;
     45     uint32 pred_word, dst_word;
     46     int res, res2;
     47 
     48     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
     49     width -= 4;
     50     dst -= width;
     51     pred -= 12;
     52     blk -= 8;
     53 
     54     while (i--)
     55     {
     56         tmp = (*(blk += 8) + 32) >> 6;
     57         *blk = 0;
     58 
     59         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
     60         res = tmp + (pred_word & 0xFF);
     61         CLIP_RESULT(res);
     62         res2 = tmp + ((pred_word >> 8) & 0xFF);
     63         CLIP_RESULT(res2);
     64         dst_word = (res2 << 8) | res;
     65         res = tmp + ((pred_word >> 16) & 0xFF);
     66         CLIP_RESULT(res);
     67         dst_word |= (res << 16);
     68         res = tmp + ((pred_word >> 24) & 0xFF);
     69         CLIP_RESULT(res);
     70         dst_word |= (res << 24);
     71         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
     72 
     73         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
     74         res = tmp + (pred_word & 0xFF);
     75         CLIP_RESULT(res);
     76         res2 = tmp + ((pred_word >> 8) & 0xFF);
     77         CLIP_RESULT(res2);
     78         dst_word = (res2 << 8) | res;
     79         res = tmp + ((pred_word >> 16) & 0xFF);
     80         CLIP_RESULT(res);
     81         dst_word |= (res << 16);
     82         res = tmp + ((pred_word >> 24) & 0xFF);
     83         CLIP_RESULT(res);
     84         dst_word |= (res << 24);
     85         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
     86     }
     87     return;
     88 }
     89 
     90 void idctcol1(int16 *blk)
     91 { /* shortcut */
     92     blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56] =
     93                                               blk[0] << 3;
     94     return;
     95 }
     96 
     97 void idctrow2(int16 *blk, uint8 *pred, uint8 *dst, int width)
     98 {
     99     int32 x0, x1, x2, x4, x5;
    100     int i = 8;
    101     uint32 pred_word, dst_word;
    102     int res, res2;
    103 
    104     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
    105     width -= 4;
    106     dst -= width;
    107     pred -= 12;
    108     blk -= 8;
    109 
    110     while (i--)
    111     {
    112         /* shortcut */
    113         x4 = blk[9];
    114         blk[9] = 0;
    115         x0 = ((*(blk += 8)) << 8) + 8192;
    116         *blk = 0;  /* for proper rounding in the fourth stage */
    117 
    118         /* first stage */
    119         x5 = (W7 * x4 + 4) >> 3;
    120         x4 = (W1 * x4 + 4) >> 3;
    121 
    122         /* third stage */
    123         x2 = (181 * (x4 + x5) + 128) >> 8;
    124         x1 = (181 * (x4 - x5) + 128) >> 8;
    125 
    126         /* fourth stage */
    127         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
    128         res = (x0 + x4) >> 14;
    129         ADD_AND_CLIP1(res);
    130         res2 = (x0 + x2) >> 14;
    131         ADD_AND_CLIP2(res2);
    132         dst_word = (res2 << 8) | res;
    133         res = (x0 + x1) >> 14;
    134         ADD_AND_CLIP3(res);
    135         dst_word |= (res << 16);
    136         res = (x0 + x5) >> 14;
    137         ADD_AND_CLIP4(res);
    138         dst_word |= (res << 24);
    139         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
    140 
    141         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
    142         res = (x0 - x5) >> 14;
    143         ADD_AND_CLIP1(res);
    144         res2 = (x0 - x1) >> 14;
    145         ADD_AND_CLIP2(res2);
    146         dst_word = (res2 << 8) | res;
    147         res = (x0 - x2) >> 14;
    148         ADD_AND_CLIP3(res);
    149         dst_word |= (res << 16);
    150         res = (x0 - x4) >> 14;
    151         ADD_AND_CLIP4(res);
    152         dst_word |= (res << 24);
    153         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
    154     }
    155     return ;
    156 }
    157 
    158 void idctcol2(int16 *blk)
    159 {
    160     int32 x0, x1, x3, x5, x7;//, x8;
    161 
    162     x1 = blk[8];
    163     x0 = ((int32)blk[0] << 11) + 128;
    164     /* both upper and lower*/
    165 
    166     x7 = W7 * x1;
    167     x1 = W1 * x1;
    168 
    169     x3 = x7;
    170     x5 = (181 * (x1 - x7) + 128) >> 8;
    171     x7 = (181 * (x1 + x7) + 128) >> 8;
    172 
    173     blk[0] = (x0 + x1) >> 8;
    174     blk[8] = (x0 + x7) >> 8;
    175     blk[16] = (x0 + x5) >> 8;
    176     blk[24] = (x0 + x3) >> 8;
    177     blk[56] = (x0 - x1) >> 8;
    178     blk[48] = (x0 - x7) >> 8;
    179     blk[40] = (x0 - x5) >> 8;
    180     blk[32] = (x0 - x3) >> 8;
    181 
    182     return ;
    183 }
    184 
    185 void idctrow3(int16 *blk, uint8 *pred, uint8 *dst, int width)
    186 {
    187     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
    188     int i = 8;
    189     uint32 pred_word, dst_word;
    190     int res, res2;
    191 
    192     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
    193     width -= 4;
    194     dst -= width;
    195     pred -= 12;
    196     blk -= 8;
    197 
    198     while (i--)
    199     {
    200         x2 = blk[10];
    201         blk[10] = 0;
    202         x1 = blk[9];
    203         blk[9] = 0;
    204         x0 = ((*(blk += 8)) << 8) + 8192;
    205         *blk = 0;   /* for proper rounding in the fourth stage */
    206         /* both upper and lower*/
    207         /* both x2orx6 and x0orx4 */
    208 
    209         x4 = x0;
    210         x6 = (W6 * x2 + 4) >> 3;
    211         x2 = (W2 * x2 + 4) >> 3;
    212         x8 = x0 - x2;
    213         x0 += x2;
    214         x2 = x8;
    215         x8 = x4 - x6;
    216         x4 += x6;
    217         x6 = x8;
    218 
    219         x7 = (W7 * x1 + 4) >> 3;
    220         x1 = (W1 * x1 + 4) >> 3;
    221         x3 = x7;
    222         x5 = (181 * (x1 - x7) + 128) >> 8;
    223         x7 = (181 * (x1 + x7) + 128) >> 8;
    224 
    225         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
    226         res = (x0 + x1) >> 14;
    227         ADD_AND_CLIP1(res);
    228         res2 = (x4 + x7) >> 14;
    229         ADD_AND_CLIP2(res2);
    230         dst_word = (res2 << 8) | res;
    231         res = (x6 + x5) >> 14;
    232         ADD_AND_CLIP3(res);
    233         dst_word |= (res << 16);
    234         res = (x2 + x3) >> 14;
    235         ADD_AND_CLIP4(res);
    236         dst_word |= (res << 24);
    237         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
    238 
    239         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
    240         res = (x2 - x3) >> 14;
    241         ADD_AND_CLIP1(res);
    242         res2 = (x6 - x5) >> 14;
    243         ADD_AND_CLIP2(res2);
    244         dst_word = (res2 << 8) | res;
    245         res = (x4 - x7) >> 14;
    246         ADD_AND_CLIP3(res);
    247         dst_word |= (res << 16);
    248         res = (x0 - x1) >> 14;
    249         ADD_AND_CLIP4(res);
    250         dst_word |= (res << 24);
    251         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
    252     }
    253 
    254     return ;
    255 }
    256 
    257 void idctcol3(int16 *blk)
    258 {
    259     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
    260 
    261     x2 = blk[16];
    262     x1 = blk[8];
    263     x0 = ((int32)blk[0] << 11) + 128;
    264 
    265     x4 = x0;
    266     x6 = W6 * x2;
    267     x2 = W2 * x2;
    268     x8 = x0 - x2;
    269     x0 += x2;
    270     x2 = x8;
    271     x8 = x4 - x6;
    272     x4 += x6;
    273     x6 = x8;
    274 
    275     x7 = W7 * x1;
    276     x1 = W1 * x1;
    277     x3 = x7;
    278     x5 = (181 * (x1 - x7) + 128) >> 8;
    279     x7 = (181 * (x1 + x7) + 128) >> 8;
    280 
    281     blk[0] = (x0 + x1) >> 8;
    282     blk[8] = (x4 + x7) >> 8;
    283     blk[16] = (x6 + x5) >> 8;
    284     blk[24] = (x2 + x3) >> 8;
    285     blk[56] = (x0 - x1) >> 8;
    286     blk[48] = (x4 - x7) >> 8;
    287     blk[40] = (x6 - x5) >> 8;
    288     blk[32] = (x2 - x3) >> 8;
    289 
    290     return;
    291 }
    292 
    293 
    294 void idctrow4(int16 *blk, uint8 *pred, uint8 *dst, int width)
    295 {
    296     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
    297     int i = 8;
    298     uint32 pred_word, dst_word;
    299     int res, res2;
    300 
    301     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
    302     width -= 4;
    303     dst -= width;
    304     pred -= 12;
    305     blk -= 8;
    306 
    307     while (i--)
    308     {
    309         x2 = blk[10];
    310         blk[10] = 0;
    311         x1 = blk[9];
    312         blk[9] = 0;
    313         x3 = blk[11];
    314         blk[11] = 0;
    315         x0 = ((*(blk += 8)) << 8) + 8192;
    316         *blk = 0;    /* for proper rounding in the fourth stage */
    317 
    318         x4 = x0;
    319         x6 = (W6 * x2 + 4) >> 3;
    320         x2 = (W2 * x2 + 4) >> 3;
    321         x8 = x0 - x2;
    322         x0 += x2;
    323         x2 = x8;
    324         x8 = x4 - x6;
    325         x4 += x6;
    326         x6 = x8;
    327 
    328         x7 = (W7 * x1 + 4) >> 3;
    329         x1 = (W1 * x1 + 4) >> 3;
    330         x5 = (W3 * x3 + 4) >> 3;
    331         x3 = (- W5 * x3 + 4) >> 3;
    332         x8 = x1 - x5;
    333         x1 += x5;
    334         x5 = x8;
    335         x8 = x7 - x3;
    336         x3 += x7;
    337         x7 = (181 * (x5 + x8) + 128) >> 8;
    338         x5 = (181 * (x5 - x8) + 128) >> 8;
    339 
    340         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
    341         res = (x0 + x1) >> 14;
    342         ADD_AND_CLIP1(res);
    343         res2 = (x4 + x7) >> 14;
    344         ADD_AND_CLIP2(res2);
    345         dst_word = (res2 << 8) | res;
    346         res = (x6 + x5) >> 14;
    347         ADD_AND_CLIP3(res);
    348         dst_word |= (res << 16);
    349         res = (x2 + x3) >> 14;
    350         ADD_AND_CLIP4(res);
    351         dst_word |= (res << 24);
    352         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
    353 
    354         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
    355         res = (x2 - x3) >> 14;
    356         ADD_AND_CLIP1(res);
    357         res2 = (x6 - x5) >> 14;
    358         ADD_AND_CLIP2(res2);
    359         dst_word = (res2 << 8) | res;
    360         res = (x4 - x7) >> 14;
    361         ADD_AND_CLIP3(res);
    362         dst_word |= (res << 16);
    363         res = (x0 - x1) >> 14;
    364         ADD_AND_CLIP4(res);
    365         dst_word |= (res << 24);
    366         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
    367     }
    368     return ;
    369 }
    370 
    371 void idctcol4(int16 *blk)
    372 {
    373     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
    374     x2 = blk[16];
    375     x1 = blk[8];
    376     x3 = blk[24];
    377     x0 = ((int32)blk[0] << 11) + 128;
    378 
    379     x4 = x0;
    380     x6 = W6 * x2;
    381     x2 = W2 * x2;
    382     x8 = x0 - x2;
    383     x0 += x2;
    384     x2 = x8;
    385     x8 = x4 - x6;
    386     x4 += x6;
    387     x6 = x8;
    388 
    389     x7 = W7 * x1;
    390     x1 = W1 * x1;
    391     x5 = W3 * x3;
    392     x3 = -W5 * x3;
    393     x8 = x1 - x5;
    394     x1 += x5;
    395     x5 = x8;
    396     x8 = x7 - x3;
    397     x3 += x7;
    398     x7 = (181 * (x5 + x8) + 128) >> 8;
    399     x5 = (181 * (x5 - x8) + 128) >> 8;
    400 
    401 
    402     blk[0] = (x0 + x1) >> 8;
    403     blk[8] = (x4 + x7) >> 8;
    404     blk[16] = (x6 + x5) >> 8;
    405     blk[24] = (x2 + x3) >> 8;
    406     blk[56] = (x0 - x1) >> 8;
    407     blk[48] = (x4 - x7) >> 8;
    408     blk[40] = (x6 - x5) >> 8;
    409     blk[32] = (x2 - x3) >> 8;
    410 
    411     return ;
    412 }
    413 
    414 void idctrow0_intra(int16 *, PIXEL *, int)
    415 {
    416     return ;
    417 }
    418 
    419 void idctrow1_intra(int16 *blk, PIXEL *comp, int width)
    420 {
    421     /* shortcut */
    422     int32 tmp;
    423     int i = 8;
    424     int offset = width;
    425     uint32 word;
    426 
    427     comp -= offset;
    428     while (i--)
    429     {
    430         tmp = ((blk[0] + 32) >> 6);
    431         blk[0] = 0;
    432         CLIP_RESULT(tmp)
    433 
    434         word = (tmp << 8) | tmp;
    435         word = (word << 16) | word;
    436 
    437         *((uint32*)(comp += offset)) = word;
    438         *((uint32*)(comp + 4)) = word;
    439 
    440 
    441 
    442 
    443         blk += B_SIZE;
    444     }
    445     return;
    446 }
    447 
    448 void idctrow2_intra(int16 *blk, PIXEL *comp, int width)
    449 {
    450     int32 x0, x1, x2, x4, x5, temp;
    451     int i = 8;
    452     int offset = width;
    453     int32 word;
    454 
    455     comp -= offset;
    456     while (i--)
    457     {
    458         /* shortcut */
    459         x4 = blk[1];
    460         blk[1] = 0;
    461         x0 = ((int32)blk[0] << 8) + 8192;
    462         blk[0] = 0;   /* for proper rounding in the fourth stage */
    463 
    464         /* first stage */
    465         x5 = (W7 * x4 + 4) >> 3;
    466         x4 = (W1 * x4 + 4) >> 3;
    467 
    468         /* third stage */
    469         x2 = (181 * (x4 + x5) + 128) >> 8;
    470         x1 = (181 * (x4 - x5) + 128) >> 8;
    471 
    472         /* fourth stage */
    473         word = ((x0 + x4) >> 14);
    474         CLIP_RESULT(word)
    475 
    476         temp = ((x0 + x2) >> 14);
    477         CLIP_RESULT(temp)
    478         word = word | (temp << 8);
    479         temp = ((x0 + x1) >> 14);
    480         CLIP_RESULT(temp)
    481         word = word | (temp << 16);
    482         temp = ((x0 + x5) >> 14);
    483         CLIP_RESULT(temp)
    484         word = word | (temp << 24);
    485         *((int32*)(comp += offset)) = word;
    486 
    487         word = ((x0 - x5) >> 14);
    488         CLIP_RESULT(word)
    489         temp = ((x0 - x1) >> 14);
    490         CLIP_RESULT(temp)
    491         word = word | (temp << 8);
    492         temp = ((x0 - x2) >> 14);
    493         CLIP_RESULT(temp)
    494         word = word | (temp << 16);
    495         temp = ((x0 - x4) >> 14);
    496         CLIP_RESULT(temp)
    497         word = word | (temp << 24);
    498         *((int32*)(comp + 4)) = word;
    499 
    500         blk += B_SIZE;
    501     }
    502     return ;
    503 }
    504 
    505 void idctrow3_intra(int16 *blk, PIXEL *comp, int width)
    506 {
    507     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
    508     int i = 8;
    509     int offset = width;
    510     int32 word;
    511 
    512     comp -= offset;
    513 
    514     while (i--)
    515     {
    516         x2 = blk[2];
    517         blk[2] = 0;
    518         x1 = blk[1];
    519         blk[1] = 0;
    520         x0 = ((int32)blk[0] << 8) + 8192;
    521         blk[0] = 0;/* for proper rounding in the fourth stage */
    522         /* both upper and lower*/
    523         /* both x2orx6 and x0orx4 */
    524 
    525         x4 = x0;
    526         x6 = (W6 * x2 + 4) >> 3;
    527         x2 = (W2 * x2 + 4) >> 3;
    528         x8 = x0 - x2;
    529         x0 += x2;
    530         x2 = x8;
    531         x8 = x4 - x6;
    532         x4 += x6;
    533         x6 = x8;
    534 
    535         x7 = (W7 * x1 + 4) >> 3;
    536         x1 = (W1 * x1 + 4) >> 3;
    537         x3 = x7;
    538         x5 = (181 * (x1 - x7) + 128) >> 8;
    539         x7 = (181 * (x1 + x7) + 128) >> 8;
    540 
    541         word = ((x0 + x1) >> 14);
    542         CLIP_RESULT(word)
    543         temp = ((x4 + x7) >> 14);
    544         CLIP_RESULT(temp)
    545         word = word | (temp << 8);
    546 
    547 
    548         temp = ((x6 + x5) >> 14);
    549         CLIP_RESULT(temp)
    550         word = word | (temp << 16);
    551 
    552         temp = ((x2 + x3) >> 14);
    553         CLIP_RESULT(temp)
    554         word = word | (temp << 24);
    555         *((int32*)(comp += offset)) = word;
    556 
    557         word = ((x2 - x3) >> 14);
    558         CLIP_RESULT(word)
    559 
    560         temp = ((x6 - x5) >> 14);
    561         CLIP_RESULT(temp)
    562         word = word | (temp << 8);
    563 
    564         temp = ((x4 - x7) >> 14);
    565         CLIP_RESULT(temp)
    566         word = word | (temp << 16);
    567 
    568         temp = ((x0 - x1) >> 14);
    569         CLIP_RESULT(temp)
    570         word = word | (temp << 24);
    571         *((int32*)(comp + 4)) = word;
    572 
    573         blk += B_SIZE;
    574     }
    575     return ;
    576 }
    577 
    578 void idctrow4_intra(int16 *blk, PIXEL *comp, int width)
    579 {
    580     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
    581     int i = 8;
    582     int offset = width;
    583     int32 word;
    584 
    585     comp -= offset;
    586 
    587     while (i--)
    588     {
    589         x2 = blk[2];
    590         blk[2] = 0;
    591         x1 = blk[1];
    592         blk[1] = 0;
    593         x3 = blk[3];
    594         blk[3] = 0;
    595         x0 = ((int32)blk[0] << 8) + 8192;
    596         blk[0] = 0;/* for proper rounding in the fourth stage */
    597 
    598         x4 = x0;
    599         x6 = (W6 * x2 + 4) >> 3;
    600         x2 = (W2 * x2 + 4) >> 3;
    601         x8 = x0 - x2;
    602         x0 += x2;
    603         x2 = x8;
    604         x8 = x4 - x6;
    605         x4 += x6;
    606         x6 = x8;
    607 
    608         x7 = (W7 * x1 + 4) >> 3;
    609         x1 = (W1 * x1 + 4) >> 3;
    610         x5 = (W3 * x3 + 4) >> 3;
    611         x3 = (- W5 * x3 + 4) >> 3;
    612         x8 = x1 - x5;
    613         x1 += x5;
    614         x5 = x8;
    615         x8 = x7 - x3;
    616         x3 += x7;
    617         x7 = (181 * (x5 + x8) + 128) >> 8;
    618         x5 = (181 * (x5 - x8) + 128) >> 8;
    619 
    620         word = ((x0 + x1) >> 14);
    621         CLIP_RESULT(word)
    622 
    623         temp = ((x4 + x7) >> 14);
    624         CLIP_RESULT(temp)
    625         word = word | (temp << 8);
    626 
    627 
    628         temp = ((x6 + x5) >> 14);
    629         CLIP_RESULT(temp)
    630         word = word | (temp << 16);
    631 
    632         temp = ((x2 + x3) >> 14);
    633         CLIP_RESULT(temp)
    634         word = word | (temp << 24);
    635         *((int32*)(comp += offset)) = word;
    636 
    637         word = ((x2 - x3) >> 14);
    638         CLIP_RESULT(word)
    639 
    640         temp = ((x6 - x5) >> 14);
    641         CLIP_RESULT(temp)
    642         word = word | (temp << 8);
    643 
    644         temp = ((x4 - x7) >> 14);
    645         CLIP_RESULT(temp)
    646         word = word | (temp << 16);
    647 
    648         temp = ((x0 - x1) >> 14);
    649         CLIP_RESULT(temp)
    650         word = word | (temp << 24);
    651         *((int32*)(comp + 4)) = word;
    652 
    653         blk += B_SIZE;
    654     }
    655 
    656     return ;
    657 }
    658 
    659 #endif
    660 
    661