Home | History | Annotate | Download | only in aacdec
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 /*
     19 
     20  Pathname: ./src/fft_rx4_short.c
     21  Funtions: fft_rx4_short
     22 
     23 ------------------------------------------------------------------------------
     24  REVISION HISTORY
     25 
     26  Description:
     27             (1) Eliminated search for max in the main loop.
     28             (2) Simplified the function by eliminating different conditions
     29                 for exp.
     30             (3) Reduced precision on w_64rx4 from Q15 to Q12, so now the
     31                 input can be as high as 1.0 and saturation will not occurre
     32                 because the accumulation times the new Q12 format will never
     33                 exceed 31 bits.
     34 
     35  Description:
     36             (1) Added comment to explain max search elimination and
     37                 Q format during multiplications
     38             (2) Increased down shift from 1 to 2, to ensure that 32-bit
     39                 numbers will not overflow when 2 consecutive adds are done
     40                 This was found during code review.
     41 
     42  Who:                                   Date:
     43  Description:
     44 
     45 ------------------------------------------------------------------------------
     46  INPUT AND OUTPUT DEFINITIONS
     47 
     48  Inputs:
     49     Data       =  Input complex vector, arranged in the following order:
     50                   real, imag, real, imag...
     51                   This is a complex vector whose elements (real and Imag) are
     52                   Int32.
     53                   type Int32 *
     54 
     55     peak_value =  Input,  peak value of the input vector
     56                   Output,  peak value of the resulting vector
     57                   type Int32 *
     58 
     59  Local Stores/Buffers/Pointers Needed:
     60     None
     61 
     62  Global Stores/Buffers/Pointers Needed:
     63     None
     64 
     65  Outputs:
     66     exponent returns a shift to compensate the scaling introduced by
     67     overflow protection
     68 
     69  Pointers and Buffers Modified:
     70     calculation are done in-place and returned in Data
     71 
     72  Local Stores Modified:
     73     None
     74 
     75  Global Stores Modified:
     76     None
     77 
     78 ------------------------------------------------------------------------------
     79  FUNCTION DESCRIPTION
     80 
     81     Fast Fourier Transform, radix 4 with Decimation in Frequency and block
     82     floating point arithmetic.
     83     The radix-4 FFT  simply divides the FFT into four smaller FFTs. Each of
     84     the smaller FFTs is then further divided into smaller ones and so on.
     85     It consists of log 4 N stages and each stage consists of N/4 dragonflies.
     86 
     87     An FFT is nothing but a bundle of multiplications and summations which
     88     may overflow during calculations.
     89 
     90 
     91     This routine uses a scheme to test and scale the result output from
     92     each FFT stage in order to fix the accumulation overflow.
     93 
     94     The Input Data should be in Q13 format to get the highest precision.
     95     At the end of each dragonfly calculation, a test for possible bit growth
     96     is made, if bit growth is possible the Data is scale down back to Q13.
     97 
     98 
     99 ------------------------------------------------------------------------------
    100  REQUIREMENTS
    101 
    102     This function should provide a fixed point FFT for an input array
    103     of size 64.
    104 
    105 ------------------------------------------------------------------------------
    106  REFERENCES
    107 
    108     [1] Advance Digital Signal Processing, J. Proakis, C. Rader, F. Ling,
    109         C. Nikias, Macmillan Pub. Co.
    110 
    111 ------------------------------------------------------------------------------
    112  PSEUDO-CODE
    113 
    114 
    115    MODIFY( x[] )
    116    RETURN( exponent )
    117 
    118 ------------------------------------------------------------------------------
    119  RESOURCES USED
    120    When the code is written for a specific target processor the
    121      the resources used should be documented below.
    122 
    123  STACK USAGE: [stack count for this module] + [variable to represent
    124           stack usage for each subroutine called]
    125 
    126      where: [stack usage variable] = stack usage for [subroutine
    127          name] (see [filename].ext)
    128 
    129  DATA MEMORY USED: x words
    130 
    131  PROGRAM MEMORY USED: x words
    132 
    133  CLOCK CYCLES: [cycle count equation for this module] + [variable
    134            used to represent cycle count for each subroutine
    135            called]
    136 
    137      where: [cycle count variable] = cycle count for [subroutine
    138         name] (see [filename].ext)
    139 
    140 ------------------------------------------------------------------------------
    141 */
    142 /*----------------------------------------------------------------------------
    143 ; INCLUDES
    144 ----------------------------------------------------------------------------*/
    145 
    146 #include "pv_audio_type_defs.h"
    147 #include "fft_rx4.h"
    148 #include "pv_normalize.h"
    149 #include "fxp_mul32.h"
    150 
    151 /*----------------------------------------------------------------------------
    152 ; MACROS
    153 ; Define module specific macros here
    154 ----------------------------------------------------------------------------*/
    155 
    156 /*----------------------------------------------------------------------------
    157 ; DEFINES
    158 ; Include all pre-processor statements here. Include conditional
    159 ; compile variables also.
    160 ----------------------------------------------------------------------------*/
    161 
    162 /*----------------------------------------------------------------------------
    163 ; LOCAL FUNCTION DEFINITIONS
    164 ; Function Prototype declaration
    165 ----------------------------------------------------------------------------*/
    166 
    167 /*----------------------------------------------------------------------------
    168 ; LOCAL VARIABLE DEFINITIONS
    169 ; Variable declaration - defined here and used outside this module
    170 ----------------------------------------------------------------------------*/
    171 
    172 /*----------------------------------------------------------------------------
    173 ; EXTERNAL FUNCTION REFERENCES
    174 ; Declare functions defined elsewhere and referenced in this module
    175 ----------------------------------------------------------------------------*/
    176 
    177 /*----------------------------------------------------------------------------
    178 ; EXTERNAL VARIABLES REFERENCES
    179 ; Declare variables used in this module but defined elsewhere
    180 ----------------------------------------------------------------------------*/
    181 
    182 /*----------------------------------------------------------------------------
    183 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
    184 ; Declare variables used in this module but defined elsewhere
    185 ----------------------------------------------------------------------------*/
    186 
    187 /*----------------------------------------------------------------------------
    188 ; FUNCTION CODE
    189 ----------------------------------------------------------------------------*/
    190 
    191 Int fft_rx4_short(
    192     Int32      Data[],
    193     Int32      *peak_value)
    194 
    195 {
    196     Int     n1;
    197     Int     n2;
    198     Int     n3;
    199     Int     j;
    200     Int     k;
    201     Int     i;
    202     Int32   exp_jw1;
    203     Int32   exp_jw2;
    204     Int32   exp_jw3;
    205 
    206 
    207     Int32   t1;
    208     Int32   t2;
    209     Int32   r1;
    210     Int32   r2;
    211     Int32   r3;
    212     Int32   s1;
    213     Int32   s2;
    214     Int32   s3;
    215 
    216     Int32   *pData1;
    217     Int32   *pData2;
    218     Int32   *pData3;
    219     Int32   *pData4;
    220     const Int32  *pw;
    221     Int32   temp1;
    222     Int32   temp2;
    223     Int32   temp3;
    224     Int32   temp4;
    225     Int32   max;
    226     Int     exp;
    227     Int     exponent = 0;
    228     Int     shift;
    229 
    230 
    231     max = *peak_value;
    232     exp = 0;
    233 
    234     if (max > 0x008000)
    235     {
    236         exp = 8 - pv_normalize(max);   /* use 24 bits  */
    237 
    238         exponent = exp;        /* keeps track of # of shifts */
    239 
    240     }
    241 
    242     n2 = FFT_RX4_SHORT;
    243 
    244     pw = W_64rx4;
    245 
    246 
    247     /* shift down to avoid possible overflow in first pass of the loop */
    248     shift = 2;
    249 
    250     for (k = FFT_RX4_SHORT; k > 4; k >>= 2)
    251     {
    252 
    253         n1 = n2;
    254         n2 >>= 2;
    255         n3 = n1 >> 1;
    256 
    257         exp -= 2;
    258 
    259         for (i = 0; i < FFT_RX4_SHORT; i += n1)
    260         {
    261             pData1 = &Data[ i<<1];
    262             pData3 = pData1 + n3;
    263             pData2 = pData1 + n1;
    264             pData4 = pData3 + n1;
    265 
    266             temp1   = *(pData1);
    267             temp2   = *(pData2);
    268             temp1   >>= shift;
    269             temp2   >>= shift;
    270 
    271             r1      = temp1 + temp2;
    272             r2      = temp1 - temp2;
    273 
    274             temp3   = *(pData3++);
    275             temp4   = *(pData4++);
    276             temp3   >>= shift;
    277             temp4   >>= shift;
    278 
    279             t1      = temp3 + temp4;
    280             t2      = temp3 - temp4;
    281 
    282             *(pData1++) = (r1 + t1) >> exp;
    283             *(pData2++) = (r1 - t1) >> exp;
    284 
    285             temp1   = *pData1;
    286             temp2   = *pData2;
    287             temp1   >>= shift;
    288             temp2   >>= shift;
    289 
    290             s1      = temp1 + temp2;
    291             s2      = temp1 - temp2;
    292 
    293             temp3   = *pData3;
    294             temp4   = *pData4;
    295             temp3   >>= shift;
    296             temp4   >>= shift;
    297 
    298             t1      = temp3 + temp4;
    299             r1      = temp3 - temp4;
    300 
    301             *pData1   = (s1 + t1) >> exp;
    302             *pData2   = (s1 - t1) >> exp;
    303 
    304             *pData4--    = (s2 + t2) >> exp;
    305             *pData4      = (r2 - r1) >> exp;
    306 
    307             *pData3--    = (s2 - t2) >> exp;
    308             *pData3      = (r2 + r1) >> exp;
    309 
    310 
    311         }  /* i */
    312 
    313         for (j = 1; j < n2; j++)
    314         {
    315             exp_jw1 = *pw++;
    316             exp_jw2 = *pw++;
    317             exp_jw3 = *pw++;
    318 
    319 
    320             for (i = j; i < FFT_RX4_SHORT; i += n1)
    321             {
    322                 pData1 = &Data[ i<<1];
    323                 pData3 = pData1 + n3;
    324                 pData2 = pData1 + n1;
    325                 pData4 = pData3 + n1;
    326 
    327                 temp1   = *(pData1);
    328                 temp2   = *(pData2++);
    329                 temp1   >>= shift;
    330                 temp2   >>= shift;
    331 
    332                 r1      = temp1 + temp2;
    333                 r2      = temp1 - temp2;
    334                 temp3   = *(pData3++);
    335                 temp4   = *(pData4++);
    336                 temp3   >>= shift;
    337                 temp4   >>= shift;
    338 
    339                 t1      = temp3 + temp4;
    340                 t2      = temp3 - temp4;
    341 
    342                 *(pData1++) = (r1 + t1) >> exp;
    343                 r1          = (r1 - t1) >> exp;
    344 
    345                 temp1   = *pData1;
    346                 temp2   = *pData2;
    347                 temp1   >>= shift;
    348                 temp2   >>= shift;
    349 
    350                 s1      = temp1 + temp2;
    351                 s2      = temp1 - temp2;
    352 
    353                 s3      = (s2 + t2) >> exp;
    354                 s2      = (s2 - t2) >> exp;
    355 
    356                 temp3   = *pData3;
    357                 temp4   = *pData4 ;
    358                 temp3   >>= shift;
    359                 temp4   >>= shift;
    360 
    361                 t1      = temp3 + temp4;
    362                 t2      = temp3 - temp4;
    363 
    364                 *pData1  = (s1 + t1) >> exp;
    365                 s1       = (s1 - t1) >> exp;
    366 
    367 
    368                 *pData2--  = cmplx_mul32_by_16(s1, -r1, exp_jw2) << 1;
    369                 *pData2    = cmplx_mul32_by_16(r1,  s1, exp_jw2) << 1;
    370 
    371                 r3       = ((r2 - t2) >> exp);
    372                 r2       = ((r2 + t2) >> exp);
    373 
    374                 *pData3--  = cmplx_mul32_by_16(s2, -r2, exp_jw1) << 1;
    375                 *pData3    = cmplx_mul32_by_16(r2,  s2, exp_jw1) << 1;
    376 
    377                 *pData4--  = cmplx_mul32_by_16(s3, -r3, exp_jw3) << 1;
    378                 *pData4    = cmplx_mul32_by_16(r3,  s3, exp_jw3) << 1;
    379 
    380             }  /* i */
    381 
    382         }  /*  j */
    383 
    384         /*
    385          *  this will reset exp and shift to zero for the second pass of the
    386          *  loop
    387          */
    388         exp   = 2;
    389         shift = 0;
    390 
    391     } /* k */
    392 
    393 
    394     max = 0;
    395 
    396     pData1 = Data - 7;
    397 
    398     for (i = ONE_FOURTH_FFT_RX4_SHORT; i != 0 ; i--)
    399     {
    400         pData1 += 7;
    401 
    402         pData3 = pData1 + 2;
    403         pData2 = pData1 + 4;
    404         pData4 = pData1 + 6;
    405 
    406         temp1   = *pData1;
    407         temp2   = *pData2++;
    408 
    409         r1      = temp1 + temp2;
    410         r2      = temp1 - temp2;
    411 
    412         temp1   = *pData3++;
    413         temp2   = *pData4++;
    414 
    415         t1      = temp1 + temp2;
    416         t2      = temp1 - temp2;
    417 
    418         temp1       = (r1 + t1);
    419         r1          = (r1 - t1);
    420         *(pData1++) = temp1;
    421         max        |= (temp1 >> 31) ^ temp1;
    422 
    423 
    424 
    425         temp1   = *pData1;
    426         temp2   = *pData2;
    427 
    428         s1      = temp1 + temp2;
    429         s2      = temp1 - temp2;
    430 
    431         s3      = (s2 + t2);
    432         s2      = (s2 - t2);
    433 
    434         temp1   = *pData3;
    435         temp2   = *pData4;
    436 
    437         t1      = temp1 + temp2;
    438         t2      = temp1 - temp2;
    439 
    440         temp1      = (s1 + t1);
    441         temp2      = (s1 - t1);
    442         *pData1    = temp1;
    443         *pData2--  = temp2;
    444         max       |= (temp1 >> 31) ^ temp1;
    445         max       |= (temp2 >> 31) ^ temp2;
    446 
    447         *pData2    = r1;
    448         *pData3--  = s2;
    449         *pData4--  = s3;
    450         max       |= (r1 >> 31) ^ r1;
    451         max       |= (s2 >> 31) ^ s2;
    452         max       |= (s3 >> 31) ^ s3;
    453 
    454         temp1      = (r2 - t2);
    455         temp2      = (r2 + t2);
    456         *pData4    = temp1;
    457         *pData3    = temp2;
    458         max       |= (temp1 >> 31) ^ temp1;
    459         max       |= (temp2 >> 31) ^ temp2;
    460 
    461     }  /* i */
    462 
    463     *peak_value = max;
    464 
    465 
    466     return (exponent);
    467 
    468 }
    469