Home | History | Annotate | Download | only in cfront
      1 /*---------------------------------------------------------------------------*
      2  *  sp_fft.c  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 /*
     21 ****************************************************************************
     22 **
     23 **  FILE:         sp_fft.cpp
     24 **
     25 **  CREATED:   11-September-99
     26 **
     27 **  DESCRIPTION:  Split-Radix FFT
     28 **
     29 **
     30 **
     31 **
     32 **  MODIFICATIONS:
     33 ** Revision history log
     34 ** VSS revision history.  Do not edit by hand.
     35 **
     36 ** $NoKeywords: $
     37 **
     38 */
     39 
     40 #ifndef _RTT
     41 #include <stdio.h>
     42 #endif
     43 #include <math.h>
     44 #include <assert.h>
     45 #include "front.h"
     46 #include "portable.h"
     47 
     48 #include "sp_fft.h"
     49 #include "himul32.h"
     50 /*extern "C" asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2);*/
     51 
     52 /* >>>> Fixed Point, Floting Point, and Machine Specific Methods <<<<
     53 
     54   We will localize all fixed point, floating point, and machine specific
     55   operations into the following methods so that in the main body of the code
     56   we would not need to worry these issues.
     57 */
     58 
     59 /* convert trigonomy function data to its required representation*/
     60 static trigonomydata
     61 to_trigonomydata(double a)
     62 {
     63   unsigned scale = (unsigned int)(1 << (8 * sizeof(trigonomydata) - 1));
     64   return (trigonomydata)(a * scale);
     65 }
     66 
     67 /* Do a sign-extending right shift of x by i bits, and
     68  * round the result based on the leftmost bit shifted out.
     69  * Must have 1 <= i < 32.
     70  * Note that C doesn't define whether right shift of signed
     71  * ints sign-extends or zero-fills.
     72  * On platforms that do sign-extend, use the native right shift.
     73  * Else use a short branch-free sequence that forces in copies
     74  * of the sign bit.
     75  */
     76 static PINLINE asr_int32_t rshift(asr_int32_t x, int i)
     77 {
     78   asr_int32_t xshift = x >> i;
     79   ASSERT(i >= 1 && i < 32);
     80 
     81 #if -1 >> 31 != -1
     82   asr_int32_t signbit = (asr_int32_t)(((asr_uint32_t)x & 0x80000000U) >> i);
     83   xshift |= -signbit;
     84 #endif
     85 
     86   xshift += (x >> (i - 1)) & 1;
     87   return xshift;
     88 }
     89 
     90 
     91 /*  compute (a + jb)*(c + jd) = a*c - b*d + j(ad + bc) */
     92 static PINLINE void complex_multiplier(trigonomydata a, trigonomydata b,
     93                                        fftdata c,   fftdata d,
     94                                        fftdata* real,  fftdata* imag)
     95 {
     96   /*
     97       himul32(factor1, factor2) = floor( (factor1 * factor2) / 2**32 )
     98       we need floor( (factor1 * factor2) / 2**31 )
     99       retain one more bit of accuracy by left shifting first.
    100   */
    101   c <<= 1;
    102   d <<= 1;
    103   *real = himul32(a, c) - himul32(b, d);
    104   *imag = himul32(a, d) + himul32(b, c);
    105 }
    106 
    107 /* determine the maximum number of bits required to represent the data */
    108 static PINLINE int data_bits(const int length, fftdata data[])
    109 {
    110   asr_uint32_t  bits = 0;
    111   int     d    = 0;
    112   int     i;
    113 
    114   ASSERT(sizeof(data[0]) == 4);
    115 
    116   for (i = 0; i < length; i++)
    117   {
    118     d = data[i];
    119     bits |= (d > 0) ? d : -d;
    120   }
    121 
    122   d = 0;
    123   while (bits > 0)
    124   {
    125     bits >>= 1;
    126     d++;
    127   }
    128 
    129   return d;
    130 }
    131 
    132 
    133 /* >>>> Fixed Point, Floting Point, and Machine Independent Methods <<<< */
    134 
    135 /* constructor */
    136 srfft* new_srfft(unsigned logLength)
    137 {
    138   srfft* pthis;
    139 
    140   /* cannot do smaller than 4 point FFT */
    141   ASSERT(logLength >= 2);
    142 
    143   pthis              = (srfft*) CALLOC(1, sizeof(srfft), "srfft");
    144   pthis->m_logLength = logLength;
    145   pthis->m_length    = 1 << logLength;
    146 
    147   allocate_bitreverse_tbl(pthis);
    148   allocate_butterfly_tbl(pthis);
    149   allocate_trigonomy_tbl(pthis);
    150 
    151   return pthis;
    152 }
    153 
    154 /* destructor */
    155 void delete_srfft(srfft* pSrfft)
    156 {
    157   FREE((char*)pSrfft->m_sin3Tbl);
    158   FREE((char*)pSrfft->m_cos3Tbl);
    159   FREE((char*)pSrfft->m_sin2Tbl);
    160   FREE((char*)pSrfft->m_cos2Tbl);
    161   FREE((char*)pSrfft->m_sin1Tbl);
    162   FREE((char*)pSrfft->m_cos1Tbl);
    163   FREE((char*)pSrfft->m_butterflyIndexTbl);
    164   FREE((char*)pSrfft->m_bitreverseTbl);
    165   FREE((char*)pSrfft);
    166 }
    167 
    168 /*
    169     allocate L shaped butterfly index lookup table
    170 */
    171 void allocate_butterfly_tbl(srfft* pthis)
    172 {
    173   unsigned butterflyLength, butterflies, *butterflyIndex;
    174   unsigned m, n, n2, i, j, i0, is, id, ii, ib;
    175 
    176 
    177   /* compute total number of L shaped butterflies */
    178   m = pthis->m_logLength;
    179   butterflyLength = 0;
    180   butterflies  = 0;
    181   for (i = 0; i < m; i++)
    182   {
    183     butterflies = (i % 2) ? butterflyLength : butterflyLength + 1;
    184     butterflyLength += butterflies;
    185   }
    186 
    187   /*  Allocate m more spaces to store size information */
    188   butterflyLength += m;
    189   butterflyIndex = (unsigned*) CALLOC(butterflyLength, sizeof(unsigned), "srfft.butterflyIndex");
    190 
    191   /* Compute and store L shaped butterfly indexes at each stage */
    192   n = pthis->m_length;
    193   n2  = n << 1;
    194   butterflyLength = 0;
    195   ib = 0;
    196   for (i = 0; i < m; i++)
    197   {
    198     butterflies = (i % 2) ? butterflyLength : butterflyLength + 1;
    199     butterflyLength += butterflies;
    200 
    201     /* Store number of L butterflies at stage m-i*/
    202     butterflyIndex[ib++] = butterflies;
    203 
    204     /* Compute Sorensen, Heideman, and Burrus indexes for L shaped butterfiles */
    205     id = n2;
    206     is = 0;
    207     n2 = n2 >> 1;
    208     while (is < n)
    209     {
    210       for (i0 = is; i0 < n; i0 += id)
    211       {
    212         butterflyIndex[ib] = i0;
    213         if (i0 != 0)
    214         {
    215           /* sort bufferfly index in increasing order to simplify look up */
    216           ii = ib;
    217           while (butterflyIndex[ii] < butterflyIndex[ii-1])
    218           {
    219             j = butterflyIndex[ii];
    220             butterflyIndex[ii] = butterflyIndex[ii-1];
    221             butterflyIndex[--ii] = j;
    222           }
    223         }
    224         ib++;
    225       }
    226       is = 2 * id - n2;
    227       id = id << 2;
    228     }
    229   }
    230   pthis->m_butterflyIndexTbl = butterflyIndex;
    231 
    232   /* move to stage 2 buffer index table */
    233   for (i = 0; i < m - 2; i++)
    234   {
    235     butterflies = *butterflyIndex;
    236     butterflyIndex += (butterflies + 1);
    237   }
    238 
    239   /*
    240       Since we want to compute four point butterflies directly,
    241       when we compute two point butterflieswe at the last stage
    242       we must bypass those two point butterflies that are decomposed
    243       from previous stage's four point butterflies .
    244   */
    245   butterflies = *butterflyIndex++; /* number of four point butterflies */
    246   ii = butterflies + 1;   /* index to the two point butterflies*/
    247   for (i = 0; i < butterflies; i++)
    248   {
    249     j = butterflyIndex[i];
    250 
    251     /*
    252         find those two point butterflies that are
    253         decomposed from the four point butterflies
    254     */
    255     while (butterflyIndex[ii] != j) /* look up is sure so do not need worry over bound*/
    256     {
    257       ii++;
    258     }
    259     butterflyIndex[ii++] = 0;
    260 
    261     ASSERT(ii <= butterflyLength + m);
    262   }
    263 }
    264 
    265 
    266 /*
    267     Allocate trigonoy function lookup tables
    268 */
    269 void allocate_trigonomy_tbl(srfft* pthis)
    270 {
    271   trigonomydata *pcos1, *psin1, *pcos2, *psin2, *pcos3, *psin3;
    272   unsigned  m, n, n2, n4, i, j, ii, nt;
    273   double   e, radias, radias3;
    274 
    275   m  = pthis->m_logLength;
    276   n  = pthis->m_length;
    277   nt = (n >> 1) - 1;
    278   pcos1 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
    279   psin1 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
    280   pcos2 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
    281   psin2 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
    282   pcos3 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
    283   psin3 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
    284 
    285   ii = 0;
    286   n2 = n << 1;
    287   for (i = 0; i < m - 1; i++)
    288   {
    289     n2 = n2 >> 1;
    290     n4 = n2 >> 2;
    291     e = 6.283185307179586 / n2;
    292 
    293     for (j = 0; j < n4; j++)
    294     {
    295       if (j != 0) /* there is no need for radias zero trigonomy tables */
    296       {
    297         radias  = j * e;
    298         radias3 = 3.0 * radias;
    299 
    300         pcos1[ii]   = to_trigonomydata(cos(radias));
    301         psin1[ii]   = to_trigonomydata(sin(radias));
    302         pcos3[ii]   = to_trigonomydata(cos(radias3));
    303         psin3[ii]   = to_trigonomydata(sin(radias3));
    304       }
    305       ii++;
    306     }
    307   }
    308 
    309   for (i = 0; i < nt; i++)
    310   {
    311     radias = 3.141592653589793 * (i + 1) / n;
    312 
    313     pcos2[i]  = to_trigonomydata(cos(radias));
    314     psin2[i]  = to_trigonomydata(sin(radias));
    315   }
    316 
    317   pthis->m_cos1Tbl = pcos1;
    318   pthis->m_sin1Tbl = psin1;
    319   pthis->m_cos2Tbl = pcos2;
    320   pthis->m_sin2Tbl = psin2;
    321   pthis->m_cos3Tbl = pcos3;
    322   pthis->m_sin3Tbl = psin3;
    323 
    324 }
    325 
    326 /*
    327     Allocate bit reverse tables
    328 */
    329 void allocate_bitreverse_tbl(srfft* pthis)
    330 {
    331   unsigned forward, reverse, *tbl;
    332   unsigned m, n, i, j;
    333 
    334   n  = pthis->m_length;
    335   tbl = (unsigned*) CALLOC(n, sizeof(unsigned), "srfft.bitreverseTbl");
    336   for (j = 0; j < n; j++) tbl[j] = 0;
    337 
    338   m  = pthis->m_logLength;
    339   forward = 1;
    340   reverse = n >> 1;
    341   for (i = 0; i < m; i++)
    342   {
    343     for (j = 0; j < n; j++)
    344     {
    345       if (forward & j) tbl[j] |= reverse;
    346     }
    347     reverse >>= 1;
    348     forward <<= 1;
    349   }
    350 
    351   pthis->m_bitreverseTbl = tbl;
    352 }
    353 
    354 
    355 /*
    356     Compute a four point FFT that requires no multiplications
    357 */
    358 static PINLINE void four_point_fft1(fftdata* data)
    359 {
    360   fftdata r0, r1, r2;
    361 
    362   r0  = data[0];
    363   r1  = data[4];
    364   data[0] = r0 + r1;
    365   data[4] = r0 - r1;
    366 
    367   r0  = data[2];
    368   r1  = data[6];
    369   data[2] = r0 + r1;
    370   data[6] = r0 - r1;
    371 
    372   r0  = data[1];
    373   r1  = data[5];
    374   data[1] = r0 + r1;
    375   data[5] = r0 - r1;
    376 
    377   r0  = data[3];
    378   r1  = data[7];
    379   data[3] = r0 + r1;
    380   data[7] = r0 - r1;
    381 
    382   r0 = data[0];
    383   r1 = data[2];
    384   data[0] = r0 + r1;
    385   data[2] = r0 - r1;
    386 
    387   r0 = data[1];
    388   r1 = data[3];
    389   data[1] = r0 + r1;
    390   data[3] = r0 - r1;
    391 
    392   r0 = data[4];
    393   r1 = data[7];
    394   r2 = data[6];
    395   data[4] = r0 + r1;
    396   data[6] = r0 - r1;
    397 
    398   r0 = data[5];
    399   data[5] = r0 - r2;
    400   data[7] = r0 + r2;
    401 }
    402 
    403 /*
    404     Compute a two point FFT that requires no multiplications
    405 */
    406 static PINLINE void two_point_fft1(fftdata* data)
    407 {
    408   fftdata r0, r1;
    409 
    410   r0 = data[0];
    411   r1 = data[2];
    412   data[0] = r0 + r1;
    413   data[2] = r0 - r1;
    414 
    415   r0 = data[1];
    416   r1 = data[3];
    417   data[1] = r0 + r1;
    418   data[3] = r0 - r1;
    419 }
    420 
    421 
    422 static PINLINE void comp_L_butterfly1(unsigned butteflyIndex, unsigned quarterLength,
    423                                       trigonomydata  cc1,  trigonomydata ss1,
    424                                       trigonomydata    cc3,  trigonomydata ss3,
    425                                       fftdata* data)
    426 {
    427   unsigned k1, k2, k3;
    428   fftdata  r0, r1, r2, r3, i0, i1, i2, i3;
    429 
    430   quarterLength <<= 1;
    431 
    432   k1 = quarterLength;
    433   k2 = k1 + quarterLength;
    434   k3 = k2 + quarterLength;
    435 
    436   r0 = data[0];
    437   r1 = data[k1];
    438   r2 = data[k2];
    439   r3 = data[k3];
    440   i0 = data[1];
    441   i1 = data[k1+1];
    442   i2 = data[k2+1];
    443   i3 = data[k3+1];
    444 
    445   /* compute the radix-2 butterfly */
    446   data[0]    = r0 + r2;
    447   data[k1]   = r1 + r3;
    448   data[1]    = i0 + i2;
    449   data[k1+1] = i1 + i3;
    450 
    451   /* compute two radix-4 butterflies with twiddle factors */
    452   r0 -= r2;
    453   r1 -= r3;
    454   i0 -= i2;
    455   i1 -= i3;
    456 
    457   r2 = r0 + i1;
    458   i2 = r1 - i0;
    459   r3 = r0 - i1;
    460   i3 = r1 + i0;
    461 
    462   /*
    463       optimize the butterfly computation for zero's power twiddle factor
    464       that does not need multimplications
    465   */
    466   if (butteflyIndex == 0)
    467   {
    468     data[k2] = r2;
    469     data[k2+1] = -i2;
    470     data[k3] = r3;
    471     data[k3+1] = i3;
    472   }
    473   else
    474   {
    475     complex_multiplier(cc1, -ss1, r2, -i2, data + k2, data + k2 + 1);
    476     complex_multiplier(cc3, -ss3, r3, i3,  data + k3, data + k3 + 1);
    477   }
    478 }
    479 
    480 /**********************************************************************/
    481 void do_fft1(srfft* pthis, unsigned length2, fftdata* data)
    482 {
    483   unsigned  *indexTbl, indexLength;
    484   trigonomydata *cos1, *sin1, *cos3, *sin3;
    485   trigonomydata cc1, ss1, cc3, ss3;
    486   unsigned  n, m, n4, i, j, k, ii, k0;
    487   fftdata   temp;
    488 
    489   /* Load butterfly index table */
    490   indexTbl = pthis->m_butterflyIndexTbl;
    491   indexLength = 0;
    492 
    493   /* Load cosine and sine tables */
    494   cos1 = pthis->m_cos1Tbl;
    495   sin1 = pthis->m_sin1Tbl;
    496   cos3 = pthis->m_cos3Tbl;
    497   sin3 = pthis->m_sin3Tbl;
    498 
    499   /* stages of butterfly computation*/
    500   n = pthis->m_length;
    501   m = pthis->m_logLength;
    502 
    503 
    504   /*
    505       compute L shaped butterfies util only 4 and 2 point
    506       butterfiles are left
    507   */
    508   n4 = n >> 1;
    509   ii = 0;
    510   for (i = 0; i < m - 2; i++)
    511   {
    512     n4 >>= 1;
    513 
    514     /* read the number of L shaped butterfly nodes at the stage */
    515     indexLength = *indexTbl++;
    516 
    517     /*
    518         compute one L shaped butterflies at each stage
    519         j (time) and k (frequency) loops are reversed to minimize
    520         trigonomy table lookups
    521     */
    522     for (j = 0; j < n4; j++)
    523     {
    524       cc1 = cos1[ii];
    525       ss1 = sin1[ii];
    526       cc3 = cos3[ii];
    527       ss3 = sin3[ii++];
    528       for (k = 0; k < indexLength; k++)
    529       {
    530         k0 = indexTbl[k] + j;
    531         k0 <<= 1;
    532         comp_L_butterfly1(j, n4, cc1, ss1, cc3, ss3, data + k0);
    533       }
    534     }
    535 
    536     /* Move to the butterfly index table of the next stage*/
    537     indexTbl += indexLength;
    538   }
    539 
    540   /* Compute 4 point butterflies at stage 2 */
    541   indexLength = *indexTbl++;
    542   for (k = 0; k < indexLength; k++)
    543   {
    544     k0 = indexTbl[k];
    545     k0 <<= 1;
    546     four_point_fft1(data + k0);
    547   }
    548   indexTbl += indexLength;
    549 
    550   /* Compute 2 point butterflies of the last stage */
    551   indexLength = *indexTbl++;
    552   for (k = 0; k < indexLength; k++)
    553   {
    554     k0 = indexTbl[k];
    555     k0 <<= 1;
    556 
    557     /* k0 = 0 implies these nodes have been computed */
    558     if (k0 != 0)
    559     {
    560       two_point_fft1(data + k0);
    561     }
    562   }
    563 
    564   /* Bit reverse the data array */
    565   indexTbl = pthis->m_bitreverseTbl;
    566   for (i = 0; i < n; i++)
    567   {
    568     ii = indexTbl[i];
    569     if (i < ii)
    570     {
    571       j = i << 1;
    572       k = ii << 1;
    573       temp = data[j];
    574       data[j] = data[k];
    575       data[k] = temp;
    576 
    577       j++;
    578       k++;
    579       temp = data[j];
    580       data[j] = data[k];
    581       data[k] = temp;
    582     }
    583   }
    584 }
    585 
    586 void do_real_fft(srfft* pthis, unsigned n, fftdata* data)
    587 {
    588   unsigned  n2;
    589   unsigned  i, i1, i2, i3, i4;
    590   fftdata   h1r, h1i, h2r, h2i, tr, ti;
    591   trigonomydata *cos2, *sin2;
    592 
    593   cos2  = pthis->m_cos2Tbl;
    594   sin2  = pthis->m_sin2Tbl;
    595 
    596   /* do a complex FFT of half size using the even indexed data
    597   ** as real component and odd indexed data as imaginary data components
    598   */
    599 
    600   do_fft1(pthis, n, data);
    601 
    602   /*
    603   **  queeze the real valued first and last component of
    604   **  the complex transform as elements data[0] and data[1]
    605   */
    606   tr = data[0];
    607   ti = data[1];
    608   data[0] = (tr + ti);
    609   data[1] = (tr - ti);
    610 
    611   /* do the rest of elements*/
    612   n2  = n >> 2;
    613   for (i = 1; i < n2; i++)
    614   {
    615     i1 = i << 1;
    616     i2 = i1 + 1;
    617     i3 = n - i1;
    618     i4 = i3 + 1;
    619 
    620     h1r = (data[i1] + data[i3]) / 2;
    621     h1i = (data[i2] - data[i4]) / 2;
    622     h2r = (data[i2] + data[i4]) / 2;
    623     h2i = -(data[i1] - data[i3]) / 2;
    624 
    625     complex_multiplier(cos2[i-1], -sin2[i-1], h2r, h2i, &tr, &ti);
    626 
    627     data[i1] = h1r + tr;
    628     data[i2] = h1i + ti;
    629     data[i3] = h1r - tr;
    630     data[i4] = -h1i + ti;
    631   }
    632   /* center one needs no multiplication, but has to reverse sign */
    633   i = (n >> 1);
    634   data[i+1] = -data[i+1];
    635 
    636 }
    637 
    638 /*****************************************************************************/
    639 
    640 int do_real_fft_magsq(srfft* pthis, unsigned n, fftdata* data)
    641 {
    642   fftdata tr, ti, last;
    643   unsigned i, ii, n1;
    644   int  scale    = 0;
    645   int  s        = 0;
    646   unsigned maxval   = 0;
    647 
    648 
    649   /*
    650   **   Windowed fftdata has an unknown data length - determine this using
    651   **   data_bits(), a maximum of:
    652   **
    653   **   fixed data = windowedData * 2**HAMMING_DATA_BITS
    654   **
    655   **   FFT will grow data log2Length. In order to avoid data overflow,
    656   **   we can scale data by a factor
    657   **
    658   **   scale = 8*sizeof(fftdata) - data_bits() - log2Length
    659   **
    660   **   In other words, we now have
    661   **
    662   **   fixed data = windowedData * 2**HAMMING_DATA_BITS * 2**scale
    663   **
    664   */
    665 
    666 
    667   scale = 8 * sizeof(fftdata) - 2 - pthis->m_logLength;
    668   scale -= data_bits(n, data);
    669 
    670   for (i = 0; i < n; i++)
    671   {
    672     if (scale < 0)
    673     {
    674       data[i] = rshift(data[i], -scale);
    675     }
    676     else
    677     {
    678       data[i] <<= scale;
    679     }
    680   }
    681 
    682   /* compute the real input fft,  the real valued first and last component of
    683   ** the complex transform is stored as elements data[0] and data[1]
    684   */
    685 
    686   do_real_fft(pthis, n, data);
    687 
    688   /*  After fft, we now have the data,
    689   **
    690   **  fixed data = fftdata * 2**HAMMING_DATA_BITS * 2**scale
    691   **
    692   **  to get fft data, we then need to reverse-shift the fixed data by the
    693   **  scale constant;
    694   **
    695   **  However, since our purpose is to compute magnitude, we can combine
    696   **  this step into the magnitude computation. Notice that
    697   **
    698   **  fixed data = fftdata * 2**(8*sizeof(fftdata) - DATA_BITS - log2Length)
    699   **
    700   **  if we use himul32 to compute the magnitude, which gives us,
    701   **
    702   **  fixed magnitude = fftdata magnitude * 2**(2*(32 - 16 - log2Length)) - 2**32)
    703   **                  = fftdata magnitude * 2**(-2*log2Length)
    704   **
    705   **  to get the fixed magnitude = fftdata magnitude * 2**(-log2Length-1)
    706   **                             = fftdata magnitude/FFT length
    707   **  we need to upscale fftdata to cancel out the log2Lenght-1 factor in
    708   **  the fixed magnitude
    709   **
    710   **  Notice that upshift scale log2Lenght-1 is not a constant, but a
    711   **  function of FFT length.
    712   **  Funthermore, even and odd log2Length-1 must be handled differently.
    713   **
    714   */
    715 
    716   /*
    717   **  This bit is a lot simpler now, we just aim to get the pre-magsqu
    718   **  values in a 30-bit range + sign.
    719   **  This is the max val if we want r*r+i*i guarenteed < signed int64 range.
    720   **  So shift the data up until it is ==30 bits (FFTDATA_SIZE-2)
    721   */
    722 
    723   s = (FFTDATA_SIZE - 2) - data_bits(n, data);
    724   /* n is twice the size, so this */
    725 
    726 
    727   for (i = 0; i < n; i++)
    728   {
    729     if (s < 0)
    730     {
    731       data[i] = rshift(data[i], -s);
    732     }
    733     else
    734     {
    735       data[i] <<= s;
    736     }
    737   }
    738 
    739   scale += s;
    740 
    741   /*
    742   **  OK, now we are in the 30bit range, we can do a magsq.
    743   **  This magsq output must be less that 60bit plus sign.
    744   */
    745 
    746   /*
    747   **  Special at start as DC and Nyquist freqs are in data[0] and data[1]
    748   **  respectively.
    749   */
    750 
    751   tr = data[0];
    752   data[0] = himul32(tr, tr);
    753   maxval |= data[0];
    754 
    755   tr = data[1];
    756   last = himul32(tr, tr);
    757   maxval |= last;
    758 
    759   n1 = n >> 1;
    760   for (i = 1; i < n1; i++)
    761   {
    762     ii = i << 1;
    763     tr = data[ii];
    764     data[i] = himul32(tr, tr);
    765 
    766     ti = data[++ii];
    767     data[i] += himul32(ti, ti);
    768 
    769     maxval |= data[i];
    770   }
    771 
    772   data[n1] = last; /* now the Nyquist freq can be put in place */
    773 
    774   /*
    775   **  computing magnitude _squared_ means the scale is effectively
    776   **  applied twice
    777   */
    778 
    779   scale *= 2;
    780 
    781   /*
    782   **  account for inherent scale of fft - we have do to this here as each
    783   **  stage scales by sqrt(2), and we couldn't add this to scale until
    784   **  after it had been multiplied by two (??)
    785   **  TODO: The truth is we got here by trial and error
    786   **   This should be checked.
    787   */
    788 
    789   scale += pthis->m_logLength + 1;
    790 
    791   /*
    792   **  doing the himul32() shift results in shifting down by 32(FFTDATA_SIZE) bits.
    793   */
    794 
    795   scale -= 32;
    796 
    797   ASSERT(maxval >= 0);
    798   ASSERT(!(maxval & 0xC0000000));
    799   /* we've done something wrong if */
    800   /* either of the top two bits  */
    801   /* get used!    */
    802 
    803   return(-scale);  /* return the amount we have shifted the */
    804   /* data _down_ by    */
    805 
    806 }
    807 
    808 
    809 /*****************************************************************************/
    810 
    811 void configure_fft(fft_info *fft, int size)
    812 {
    813   unsigned int log2Length, length;
    814 
    815   log2Length = 0;
    816   length = size / 2;
    817   while (length > 1)
    818   {
    819     length = length >> 1;
    820     log2Length++;
    821   }
    822 
    823   ASSERT(size == 1 << (log2Length + 1));
    824   fft->size2 = size;
    825   fft->size = size / 2;
    826 
    827   fft->m_srfft = new_srfft(log2Length);
    828   fft->real = (fftdata*) CALLOC(size + 2, sizeof(fftdata), "srfft.fft_data");
    829   fft->imag = fft->real + size / 2 + 1;
    830 }
    831 
    832 int fft_perform_and_magsq(fft_info *fft)
    833 {
    834   unsigned n = fft->size2;
    835   fftdata     *real = fft->real;
    836   srfft       *pSrfft = fft->m_srfft;
    837   ;
    838 
    839   return do_real_fft_magsq(pSrfft, n, real);
    840 }
    841 
    842 void unconfigure_fft(fft_info *fft)
    843 {
    844   ASSERT(fft);
    845   delete_srfft(fft->m_srfft);
    846   FREE((char*)fft->real);
    847 }
    848 
    849 
    850 int place_sample_data(fft_info *fft, fftdata *seq, fftdata *smooth, int num)
    851 {
    852   int ii, size2;
    853   srfft * pSrfft;
    854 
    855   pSrfft = fft->m_srfft;
    856 
    857   ASSERT(fft);
    858   ASSERT(seq);
    859   ASSERT(smooth);
    860   ASSERT(num <= (int)fft->size2);
    861   size2 = fft->size2;
    862 
    863   for (ii = 0; ii < num; ii++)
    864   {
    865     fft->real[ii] = (fftdata)(seq[ii] * smooth[ii]);
    866   }
    867 
    868   for (; ii < size2; ii++)
    869   {
    870     fft->real[ii] = 0;
    871   }
    872 
    873   return(-(HALF_FFTDATA_SIZE - 1));
    874 }
    875 
    876