Home | History | Annotate | Download | only in aacdec
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 /*
     19 
     20  Pathname: ./src/fft_rx4_long.c
     21  Funtions: fft_rx4_long
     22 
     23 ------------------------------------------------------------------------------
     24  REVISION HISTORY
     25 
     26  Description:
     27             (1) Eliminated search for max in the main loop.
     28             (2) Reduced precision on w_256rx4 from Q15 to Q10
     29 
     30  Description:
     31             (1) Created function fft_rx4_long_no_max to overcome LTP problem.
     32 
     33  Description:
     34             (1) Modified shift so the accumulation growths faster than the
     35                 downshift, so now the input can be as high as 1.0 and saturation
     36                 will not occurre. The accumulation times the Q10 format will
     37                 never exceed 31 bits. This increases precision
     38             (2) Eliminated unneeded data moves, used before for max search.
     39             (3) Eliminated function fft_rx4_long_no_max.
     40 
     41  Description:
     42             (1) Added comment to explain max search elimination and
     43                 Q format during multiplications
     44 
     45  Who:                       Date:
     46  Description:
     47 
     48 ------------------------------------------------------------------------------
     49  INPUT AND OUTPUT DEFINITIONS
     50 
     51  Inputs:
     52     Data       =  Input complex vector, arranged in the following order:
     53                   real, imag, real, imag...
     54                   This is a complex vector whose elements (real and Imag) are
     55                   Int32.
     56                   type Int32 *
     57 
     58     peak_value =  Input,  peak value of the input vector
     59                   Output,  peak value of the resulting vector
     60                   type Int32 *
     61 
     62  Local Stores/Buffers/Pointers Needed:
     63     None
     64 
     65  Global Stores/Buffers/Pointers Needed:
     66     None
     67 
     68  Outputs:
     69     None
     70 
     71  Pointers and Buffers Modified:
     72     calculation are done in-place and returned in Data
     73 
     74  Local Stores Modified:
     75     None
     76 
     77  Global Stores Modified:
     78     None
     79 
     80 ------------------------------------------------------------------------------
     81  FUNCTION DESCRIPTION
     82 
     83     Fast Fourier Transform, radix 4 with Decimation in Frequency and block
     84     floating point arithmetic.
     85     The radix-4 FFT  simply divides the FFT into four smaller FFTs. Each of
     86     the smaller FFTs is then further divided into smaller ones and so on.
     87     It consists of log 4 N stages and each stage consists of N/4 dragonflies.
     88 
     89     An FFT is nothing but a bundle of multiplications and summations which
     90     may overflow during calculations.
     91 
     92 
     93     This routine uses a scheme to test and scale the result output from
     94     each FFT stage in order to fix the accumulation overflow.
     95 
     96     The Input Data should be in Q13 format to get the highest precision.
     97     At the end of each dragonfly calculation, a test for possible bit growth
     98     is made, if bit growth is possible the Data is scale down back to Q13.
     99 
    100 ------------------------------------------------------------------------------
    101  REQUIREMENTS
    102 
    103     This function should provide a fixed point FFT for an input array
    104     of size 256.
    105 
    106 ------------------------------------------------------------------------------
    107  REFERENCES
    108 
    109     [1] Advance Digital Signal Processing, J. Proakis, C. Rader, F. Ling,
    110         C. Nikias, Macmillan Pub. Co.
    111 
    112 ------------------------------------------------------------------------------
    113  PSEUDO-CODE
    114 
    115 
    116    MODIFY( x[] )
    117    RETURN( exponent )
    118 
    119 ------------------------------------------------------------------------------
    120  RESOURCES USED
    121    When the code is written for a specific target processor the
    122      the resources used should be documented below.
    123 
    124  STACK USAGE: [stack count for this module] + [variable to represent
    125           stack usage for each subroutine called]
    126 
    127      where: [stack usage variable] = stack usage for [subroutine
    128          name] (see [filename].ext)
    129 
    130  DATA MEMORY USED: x words
    131 
    132  PROGRAM MEMORY USED: x words
    133 
    134  CLOCK CYCLES: [cycle count equation for this module] + [variable
    135            used to represent cycle count for each subroutine
    136            called]
    137 
    138      where: [cycle count variable] = cycle count for [subroutine
    139         name] (see [filename].ext)
    140 
    141 ------------------------------------------------------------------------------
    142 */
    143 /*----------------------------------------------------------------------------
    144 ; INCLUDES
    145 ----------------------------------------------------------------------------*/
    146 
    147 #include "pv_audio_type_defs.h"
    148 #include "fft_rx4.h"
    149 
    150 #include "fxp_mul32.h"
    151 
    152 /*----------------------------------------------------------------------------
    153 ; MACROS
    154 ; Define module specific macros here
    155 ----------------------------------------------------------------------------*/
    156 
    157 /*----------------------------------------------------------------------------
    158 ; DEFINES
    159 ; Include all pre-processor statements here. Include conditional
    160 ; compile variables also.
    161 ----------------------------------------------------------------------------*/
    162 
    163 /*----------------------------------------------------------------------------
    164 ; LOCAL FUNCTION DEFINITIONS
    165 ; Function Prototype declaration
    166 ----------------------------------------------------------------------------*/
    167 
    168 /*----------------------------------------------------------------------------
    169 ; LOCAL VARIABLE DEFINITIONS
    170 ; Variable declaration - defined here and used outside this module
    171 ----------------------------------------------------------------------------*/
    172 
    173 /*----------------------------------------------------------------------------
    174 ; EXTERNAL FUNCTION REFERENCES
    175 ; Declare functions defined elsewhere and referenced in this module
    176 ----------------------------------------------------------------------------*/
    177 
    178 /*----------------------------------------------------------------------------
    179 ; EXTERNAL VARIABLES REFERENCES
    180 ; Declare variables used in this module but defined elsewhere
    181 ----------------------------------------------------------------------------*/
    182 
    183 /*----------------------------------------------------------------------------
    184 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
    185 ; Declare variables used in this module but defined elsewhere
    186 ----------------------------------------------------------------------------*/
    187 
    188 /*----------------------------------------------------------------------------
    189 ; FUNCTION CODE
    190 ----------------------------------------------------------------------------*/
    191 
    192 
    193 void fft_rx4_long(
    194     Int32      Data[],
    195     Int32      *peak_value)
    196 
    197 {
    198     Int     n1;
    199     Int     n2;
    200     Int     j;
    201     Int     k;
    202     Int     i;
    203 
    204     Int32   t1;
    205     Int32   t2;
    206     Int32   r1;
    207     Int32   r2;
    208     Int32   r3;
    209     Int32   r4;
    210     Int32   s1;
    211     Int32   s2;
    212     Int32   s3;
    213     Int32   *pData1;
    214     Int32   *pData2;
    215     Int32   *pData3;
    216     Int32   *pData4;
    217     Int32   temp1;
    218     Int32   temp2;
    219     Int32   temp3;
    220     Int32   temp4;
    221     Int32   max;
    222 
    223     Int32   exp_jw1;
    224     Int32   exp_jw2;
    225     Int32   exp_jw3;
    226 
    227 
    228 
    229     const Int32  *pw = W_256rx4;
    230 
    231     n2 = FFT_RX4_LONG;
    232 
    233     for (k = FFT_RX4_LONG; k > 4; k >>= 2)
    234     {
    235 
    236         n1 = n2;
    237         n2 >>= 2;
    238 
    239         for (i = 0; i < FFT_RX4_LONG; i += n1)
    240         {
    241             pData1 = &Data[ i<<1];
    242             pData2 = pData1 + n1;
    243 
    244             temp1   = *pData1;
    245             temp2   = *pData2;
    246 
    247             r1      = temp1 + temp2;
    248             r2      = temp1 - temp2;
    249 
    250             pData3 = pData1 + (n1 >> 1);
    251             pData4 = pData3 + n1;
    252             temp3   = *pData3++;
    253             temp4   = *pData4++;
    254 
    255             t1      = temp3 + temp4;
    256 
    257             *(pData1++) = (r1 + t1);
    258             t2      = temp3 - temp4;
    259             *(pData2++) = (r1 - t1);
    260 
    261             temp1   = *pData1;
    262             temp2   = *pData2;
    263 
    264             s1      = temp1 + temp2;
    265             temp3   = *pData3;
    266             s2      = temp1 - temp2;
    267             temp4   = *pData4;
    268             *pData3--  = (s2 - t2);
    269             *pData4--  = (s2 + t2);
    270 
    271             t1      = temp3 + temp4;
    272 
    273             *pData1    = (s1 + t1);
    274             *pData2    = (s1 - t1);
    275 
    276             r1      = temp3 - temp4;
    277 
    278             *pData4    = (r2 - r1);
    279             *pData3    = (r2 + r1);
    280 
    281         }  /* i */
    282 
    283 
    284 
    285         for (j = 1; j < n2; j++)
    286         {
    287 
    288             exp_jw1 = (*pw++);
    289             exp_jw2 = (*pw++);
    290             exp_jw3 = (*pw++);
    291 
    292 
    293             for (i = j; i < FFT_RX4_LONG; i += n1)
    294             {
    295                 pData1 = &Data[ i<<1];
    296                 pData2 = pData1 + n1;
    297 
    298                 temp1   = *pData1;
    299                 temp2   = *pData2++;
    300 
    301                 r1      = temp1 + temp2;
    302                 r2      = temp1 - temp2;
    303 
    304                 pData3 = pData1 + (n1 >> 1);
    305                 pData4 = pData3 + n1;
    306                 temp3   = *pData3++;
    307                 temp4   = *pData4++;
    308 
    309                 r3      = temp3 + temp4;
    310                 r4      = temp3 - temp4;
    311 
    312                 *(pData1++) = (r1 + r3);
    313                 r1          = (r1 - r3) << 1;
    314 
    315                 temp2   = *pData2;
    316                 temp1   = *pData1;
    317 
    318                 s1      = temp1 + temp2;
    319                 s2      = temp1 - temp2;
    320                 s3      = (s2 + r4) << 1;
    321                 s2      = (s2 - r4) << 1;
    322 
    323                 temp3   = *pData3;
    324                 temp4   = *pData4;
    325 
    326                 t1      = temp3 + temp4;
    327                 t2      = temp3 - temp4;
    328 
    329                 *pData1  = (s1 + t1);
    330                 s1       = (s1 - t1) << 1;
    331 
    332                 *pData2--  = cmplx_mul32_by_16(s1, -r1, exp_jw2);
    333                 r3      = (r2 - t2) << 1;
    334                 *pData2    = cmplx_mul32_by_16(r1,  s1, exp_jw2);
    335 
    336                 r2      = (r2 + t2) << 1;
    337 
    338                 *pData3--  = cmplx_mul32_by_16(s2, -r2, exp_jw1);
    339                 *pData3    = cmplx_mul32_by_16(r2,  s2, exp_jw1);
    340 
    341                 *pData4--  = cmplx_mul32_by_16(s3, -r3, exp_jw3);
    342                 *pData4    = cmplx_mul32_by_16(r3,  s3, exp_jw3);
    343 
    344             }  /* i */
    345 
    346         }  /*  j */
    347 
    348     } /* k */
    349 
    350 
    351     max = 0;
    352 
    353     pData1 = Data - 7;
    354 
    355 
    356     for (i = ONE_FOURTH_FFT_RX4_LONG; i != 0 ; i--)
    357     {
    358         pData1 += 7;
    359         pData2 = pData1 + 4;
    360 
    361 
    362         temp1   = *pData1;
    363         temp2   = *pData2++;
    364 
    365         r1      = temp1 + temp2;
    366         r2      = temp1 - temp2;
    367 
    368         pData3 = pData1 + 2;
    369         pData4 = pData1 + 6;
    370         temp1   = *pData3++;
    371         temp2   = *pData4++;
    372 
    373         t1      = temp1 + temp2;
    374         t2      = temp1 - temp2;
    375 
    376         temp1       = (r1 + t1);
    377         r1          = (r1 - t1);
    378         *(pData1++) = temp1;
    379         max        |= (temp1 >> 31) ^ temp1;
    380 
    381 
    382 
    383         temp2   = *pData2;
    384         temp1   = *pData1;
    385 
    386         s1      = temp1 + temp2;
    387         s2      = temp1 - temp2;
    388 
    389 
    390         temp1   = *pData3;
    391         temp2   = *pData4;
    392 
    393         s3      = (s2 + t2);
    394         s2      = (s2 - t2);
    395 
    396         t1      = temp1 + temp2;
    397         t2      = temp1 - temp2;
    398 
    399         temp1      = (s1 + t1);
    400         *pData1    = temp1;
    401         temp2      = (s1 - t1);
    402 
    403         max       |= (temp1 >> 31) ^ temp1;
    404         *pData2--  = temp2;
    405         max       |= (temp2 >> 31) ^ temp2;
    406 
    407         *pData2    = r1;
    408         max       |= (r1 >> 31) ^ r1;
    409         *pData3--  = s2;
    410         max       |= (s2 >> 31) ^ s2;
    411         *pData4--  = s3;
    412         max       |= (s3 >> 31) ^ s3;
    413 
    414         temp1      = (r2 - t2);
    415         *pData4    = temp1;
    416         temp2      = (r2 + t2);
    417         *pData3    = temp2;
    418         max       |= (temp1 >> 31) ^ temp1;
    419         max       |= (temp2 >> 31) ^ temp2;
    420 
    421     }  /* i */
    422 
    423     *peak_value = max;
    424 
    425     return ;
    426 
    427 }
    428 
    429