Home | History | Annotate | Download | only in lib
      1 /*
      2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 /**
     17  * @file picofftsg.c
     18  *
     19  * FFT/DCT related data types, constants and functions in Pico
     20  *
     21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
     22  * All rights reserved.
     23  *
     24  * History:
     25  * - 2009-04-20 -- initial version
     26  *
     27 */
     28 
     29 #include "picoos.h"
     30 #include "picofftsg.h"
     31 #include "picodbg.h"
     32 
     33 #ifdef __cplusplus
     34 extern "C" {
     35 #endif
     36 
     37 /**
     38  * @addtogroup picofft
     39  * ---------------------------------------------------\n
     40  * <b> Fast Fourier/Cosine/Sine Transform </b>\n
     41  * Adapted from http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html (Copyright Takuya OOURA, 1996-2001)\n
     42  * ---------------------------------------------------\n
     43 
     44   Overall features
     45   - dimension   :one
     46   - data length :power of 2
     47   - decimation  :frequency
     48   - radix       :split-radix
     49   - data        :inplace
     50   - table       :not use
     51 
     52   functions
     53   - cdft: Complex Discrete Fourier Transform
     54   - rdft: Real Discrete Fourier Transform
     55   - ddct: Discrete Cosine Transform
     56   - ddst: Discrete Sine Transform
     57   - dfct: Cosine Transform of RDFT (Real Symmetric DFT)
     58   - dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
     59 
     60   function prototypes
     61   - void cdft(picoos_int32, picoos_int32, PICOFFTSG_FFTTYPE *);
     62   - void rdft(picoos_int32, picoos_int32, PICOFFTSG_FFTTYPE *);
     63   - void ddct(picoos_int32, picoos_int32, PICOFFTSG_FFTTYPE *);
     64   - void ddst(picoos_int32, picoos_int32, PICOFFTSG_FFTTYPE *);
     65   - void dfct(picoos_int32, PICOFFTSG_FFTTYPE *);
     66   - void dfst(picoos_int32, PICOFFTSG_FFTTYPE *);
     67 
     68   <b>Complex DFT (Discrete Fourier Transform)</b>
     69 
     70   [definition]
     71   - <case1>
     72       - X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
     73   - <case2>
     74       - X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
     75   - (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
     76 
     77   [usage]
     78   - <case1>
     79       - cdft(2*n, 1, a);
     80   - <case2>
     81       - cdft(2*n, -1, a);
     82 
     83   [parameters]
     84   - 2*n            :data length (picoos_int32)
     85   - n >= 1, n = power of 2
     86   - a[0...2*n-1]   :input/output data (PICOFFTSG_FFTTYPE *)
     87   - input data
     88       - a[2*j] = Re(x[j]),
     89       - a[2*j+1] = Im(x[j]), 0<=j<n
     90   - output data
     91       - a[2*k] = Re(X[k]),
     92       - a[2*k+1] = Im(X[k]), 0<=k<n
     93 
     94   [remark]
     95   - Inverse of cdft(2*n, -1, a); is
     96       -cdft(2*n, 1, a);
     97           - for (j = 0; j <= 2 * n - 1; j++) {
     98               - a[j] *= 1.0 / n;
     99           - }
    100 
    101 
    102   <b> Real DFT / Inverse of Real DFT </b>
    103 
    104   [definition]
    105   - <case1> RDFT
    106       - R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
    107       - I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
    108   -    <case2> IRDFT (excluding scale)
    109       - a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
    110       - sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
    111       - sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
    112 
    113   [usage]
    114   - <case1>
    115       - rdft(n, 1, a);
    116   -    <case2>
    117       - rdft(n, -1, a);
    118 
    119   [parameters]
    120   - n              :data length (picoos_int32)
    121   - n >= 2, n = power of 2
    122   - a[0...n-1]     :input/output data (PICOFFTSG_FFTTYPE *)
    123   - <case1>
    124       - output data
    125           - a[2*k] = R[k], 0<=k<n/2
    126           - a[2*k+1] = I[k], 0<k<n/2
    127           - a[1] = R[n/2]
    128   - <case2>
    129       - input data
    130           - a[2*j] = R[j], 0<=j<n/2
    131           - a[2*j+1] = I[j], 0<j<n/2
    132           - a[1] = R[n/2]
    133 
    134   [remark]
    135   - Inverse of rdft(n, 1, a); is
    136   - rdft(n, -1, a);
    137       - for (j = 0; j <= n - 1; j++) {
    138           - a[j] *= 2.0 / n;
    139      -}
    140 
    141 
    142   <b> DCT (Discrete Cosine Transform) / Inverse of DCT</b>
    143 
    144   [definition]
    145   - <case1> IDCT (excluding scale)
    146       - C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
    147   - <case2> DCT
    148       - C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
    149 
    150   [usage]
    151   - <case1>
    152     - ddct(n, 1, a);
    153   - <case2>
    154     - ddct(n, -1, a);
    155 
    156   [parameters]
    157   - n              :data length (picoos_int32)
    158   - n >= 2, n = power of 2
    159   - a[0...n-1]     :input/output data (PICOFFTSG_FFTTYPE *)
    160   - output data
    161       - a[k] = C[k], 0<=k<n
    162 
    163   [remark]
    164   - Inverse of ddct(n, -1, a); is
    165   - a[0] *= 0.5;
    166   - ddct(n, 1, a);
    167   - for (j = 0; j <= n - 1; j++) {
    168       - a[j] *= 2.0 / n;
    169   - }
    170 
    171   <b> DST (Discrete Sine Transform) / Inverse of DST</b>
    172 
    173   [definition]
    174   - <case1> IDST (excluding scale)
    175       - S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
    176   - <case2> DST
    177       - S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
    178 
    179   [usage]
    180   - <case1>
    181       - ddst(n, 1, a);
    182   - <case2>
    183       - ddst(n, -1, a);
    184 
    185   [parameters]
    186   - n              :data length (picoos_int32)
    187   - n >= 2, n = power of 2
    188   - a[0...n-1]     :input/output data (PICOFFTSG_FFTTYPE *)
    189   - <case1>
    190       - input data
    191           - a[j] = A[j], 0<j<n
    192           - a[0] = A[n]
    193       - output data
    194           - a[k] = S[k], 0<=k<n
    195   - <case2>
    196     - output data
    197         - a[k] = S[k], 0<k<n
    198         - a[0] = S[n]
    199 
    200   [remark]
    201   - Inverse of ddst(n, -1, a); is
    202   - a[0] *= 0.5;
    203   - ddst(n, 1, a);
    204   - for (j = 0; j <= n - 1; j++) {
    205       - a[j] *= 2.0 / n;
    206   - }
    207 
    208   <b> Cosine Transform of RDFT (Real Symmetric DFT)</b>
    209 
    210   [definition]
    211   - C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
    212 
    213   [usage]
    214   - dfct(n, a);
    215 
    216   [parameters]
    217   - n              :data length - 1 (picoos_int32)
    218   - n >= 2, n = power of 2
    219   - a[0...n]       :input/output data (PICOFFTSG_FFTTYPE *)
    220 
    221   - output data
    222     - a[k] = C[k], 0<=k<=n
    223 
    224   [remark]
    225   - Inverse of a[0] *= 0.5; a[n] *= 0.5; dfct(n, a); is
    226   - a[0] *= 0.5;
    227   - a[n] *= 0.5;
    228   - dfct(n, a);
    229   - for (j = 0; j <= n; j++) {
    230       - a[j] *= 2.0 / n;
    231   - }
    232 
    233   <b> Sine Transform of RDFT (Real Anti-symmetric DFT)</b>
    234 
    235   [definition]
    236   - S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
    237 
    238   [usage]
    239   - dfst(n, a);
    240 
    241   [parameters]
    242   - n              :data length + 1 (picoos_int32)
    243   - n >= 2, n = power of 2
    244   - a[0...n-1]     :input/output data (PICOFFTSG_FFTTYPE *)
    245   - output data
    246       - a[k] = S[k], 0<k<n
    247       - (a[0] is used for work area)
    248 
    249   [remark]
    250   - Inverse of dfst(n, a); is
    251   - dfst(n, a);
    252   - for (j = 1; j <= n - 1; j++) {
    253     - a[j] *= 2.0 / n;
    254   - }
    255 
    256 */
    257 
    258 /* fixed point multiplier for weights */
    259 #define PICODSP_WGT_SHIFT  (0x20000000)  /* 2^29 */
    260 #define PICOFFTSG_WGT_SHIFT2 (0x10000000)  /* PICODSP_WGT_SHIFT/2 */
    261 #define PICOFFTSG_WGT_N_SHIFT (29)  /* 2^29 */
    262 /* fixed point known constants */
    263 #ifndef WR5000  /* cos(M_PI_2*0.5000) */
    264 #define WR5000      (PICOFFTSG_FFTTYPE)(0.707106781186547524400844362104849039284835937688*PICODSP_WGT_SHIFT)
    265 #ifndef WR2500  /* cos(M_PI_2*0.2500) */
    266 #define WR2500      (PICOFFTSG_FFTTYPE)(0.923879532511286756128183189396788286822416625863*PICODSP_WGT_SHIFT)
    267 #endif
    268 #ifndef WI2500  /* sin(M_PI_2*0.2500) */
    269 #define WI2500      (PICOFFTSG_FFTTYPE)(0.382683432365089771728459984030398866761344562485*PICODSP_WGT_SHIFT)
    270 #endif
    271 #ifndef WR1250  /* cos(M_PI_2*0.1250) */
    272 #define WR1250      (PICOFFTSG_FFTTYPE)(0.980785280403230449126182236134239036973933730893*PICODSP_WGT_SHIFT)
    273 #endif
    274 #ifndef WI1250  /* sin(M_PI_2*0.1250) */
    275 #define WI1250      (PICOFFTSG_FFTTYPE)(0.195090322016128267848284868477022240927691617751*PICODSP_WGT_SHIFT)
    276 #endif
    277 #ifndef WR3750  /* cos(M_PI_2*0.3750) */
    278 #define WR3750      (PICOFFTSG_FFTTYPE)(0.831469612302545237078788377617905756738560811987*PICODSP_WGT_SHIFT)
    279 #endif
    280 #ifndef WI3750  /* sin(M_PI_2*0.3750) */
    281 #define WI3750      (PICOFFTSG_FFTTYPE)(0.555570233019602224742830813948532874374937190754*PICODSP_WGT_SHIFT)
    282 #endif
    283 
    284 #else
    285 
    286  /*#ifndef M_PI_2
    287    #define M_PI_2      (PICOFFTSG_FFTTYPE)1.570796326794896619231321691639751442098584699687
    288    #endif*/
    289 #ifndef WR5000  /* cos(M_PI_2*0.5000) */
    290 #define WR5000      (PICOFFTSG_FFTTYPE)0.707106781186547524400844362104849039284835937688
    291 #endif
    292 #ifndef WR2500  /* cos(M_PI_2*0.2500) */
    293 #define WR2500      (PICOFFTSG_FFTTYPE)0.923879532511286756128183189396788286822416625863
    294 #endif
    295 #ifndef WI2500  /* sin(M_PI_2*0.2500) */
    296 #define WI2500      (PICOFFTSG_FFTTYPE)0.382683432365089771728459984030398866761344562485
    297 #endif
    298 #ifndef WR1250  /* cos(M_PI_2*0.1250) */
    299 #define WR1250      (PICOFFTSG_FFTTYPE)0.980785280403230449126182236134239036973933730893
    300 #endif
    301 #ifndef WI1250  /* sin(M_PI_2*0.1250) */
    302 #define WI1250      (PICOFFTSG_FFTTYPE)0.195090322016128267848284868477022240927691617751
    303 #endif
    304 #ifndef WR3750  /* cos(M_PI_2*0.3750) */
    305 #define WR3750      (PICOFFTSG_FFTTYPE)0.831469612302545237078788377617905756738560811987
    306 #endif
    307 #ifndef WI3750  /* sin(M_PI_2*0.3750) */
    308 #define WI3750      (PICOFFTSG_FFTTYPE)0.555570233019602224742830813948532874374937190754
    309 #endif
    310 
    311 #endif
    312 
    313 #ifndef CDFT_LOOP_DIV  /* control of the CDFT's speed & tolerance */
    314 #define CDFT_LOOP_DIV 32
    315 #define CDFT_LOOP_DIV_4 128
    316 #endif
    317 
    318 #ifndef RDFT_LOOP_DIV  /* control of the RDFT's speed & tolerance */
    319 #define RDFT_LOOP_DIV 64
    320 #define RDFT_LOOP_DIV4 256
    321 #define RDFT_LOOP_DIV_4 256
    322 #endif
    323 
    324 #ifndef DCST_LOOP_DIV  /* control of the DCT,DST's speed & tolerance */
    325 #define DCST_LOOP_DIV 64
    326 #define DCST_LOOP_DIV2 128
    327 #endif
    328 
    329 
    330 #define POW1 (0x1)
    331 #define POW2 (0x2)
    332 #define POW3 (0x4)
    333 #define POW4 (0x8)
    334 #define POW5 (0x10)
    335 #define POW6 (0x20)
    336 #define POW7 (0x40)
    337 #define POW8 (0x80)
    338 #define POW9 (0x100)
    339 #define POW10 (0x200)
    340 #define POW11 (0x400)
    341 #define POW12 (0x800)
    342 #define POW13 (0x1000)
    343 #define POW14 (0x2000)
    344 #define POW15 (0x4000)
    345 #define POW16 (0x8000)
    346 #define POW17 (0x10000)
    347 #define POW18 (0x20000)
    348 #define POW19 (0x40000)
    349 #define POW20 (0x80000)
    350 #define POW21 (0x100000)
    351 #define POW22 (0x200000)
    352 #define POW23 (0x400000)
    353 #define POW24 (0x800000)
    354 #define POW25 (0x1000000)
    355 #define POW26 (0x2000000)
    356 #define POW27 (0x4000000)
    357 #define POW28 (0x8000000)
    358 #define POW29 (0x10000000)
    359 #define POW30 (0x20000000)
    360 #define POW31 (0x40000000)
    361 /**************
    362  * useful macros
    363  ************** */
    364 #define picofftsg_highestBitPos(x) (x==0?0:(x>=POW17?(x>=POW25?(x>=POW29?(x>=POW31?31:(x>=POW30?30:29)):(x>=POW27?(x>=POW28?28:27):(x>=POW26?26:25))):(x>=POW21?(x>=POW23?(x>=POW24?24:23):(x>=POW22?22:21)):(x>=POW19?(x>=POW20?20:19):(x>=POW18?18:17)))):(x>=POW9?(x>=POW13?(x>=POW15?(x>=POW16?16:15):(x>=POW14?14:13)):(x>=POW11?(x>=POW12?12:11):(x>=POW10?10:9))):(x>=POW5?(x>=POW7?(x>=POW8?8:7):(x>=POW6?6:5)):(x>=POW3?(x>=POW4?4:3):(x>=POW2?2:1))))))
    365 #define picofftsg_highestBit(x) (x==0?0:(x<0?(zz=-x,(zz>=POW17?(zz>=POW25?(zz>=POW29?(zz>=POW31?31:(zz>=POW30?30:29)):(zz>=POW27?(zz>=POW28?28:27):(zz>=POW26?26:25))):(zz>=POW21?(zz>=POW23?(zz>=POW24?24:23):(zz>=POW22?22:21)):(zz>=POW19?(zz>=POW20?20:19):(zz>=POW18?18:17)))):(zz>=POW9?(zz>=POW13?(zz>=POW15?(zz>=POW16?16:15):(zz>=POW14?14:13)):(zz>=POW11?(zz>=POW12?12:11):(zz>=POW10?10:9))):(zz>=POW5?(zz>=POW7?(zz>=POW8?8:7):(zz>=POW6?6:5)):(zz>=POW3?(zz>=POW4?4:3):(zz>=POW2?2:1)))))):(x>=POW17?(x>=POW25?(x>=POW29?(x>=POW31?31:(x>=POW30?30:29)):(x>=POW27?(x>=POW28?28:27):(x>=POW26?26:25))):(x>=POW21?(x>=POW23?(x>=POW24?24:23):(x>=POW22?22:21)):(x>=POW19?(x>=POW20?20:19):(x>=POW18?18:17)))):(x>=POW9?(x>=POW13?(x>=POW15?(x>=POW16?16:15):(x>=POW14?14:13)):(x>=POW11?(x>=POW12?12:11):(x>=POW10?10:9))):(x>=POW5?(x>=POW7?(x>=POW8?8:7):(x>=POW6?6:5)):(x>=POW3?(x>=POW4?4:3):(x>=POW2?2:1)))))))
    366 #define Mult_W_W picofftsg_mult_w_w
    367 
    368 
    369 /* ***********************************************************************************************/
    370 /* forward declarations */
    371 /* ***********************************************************************************************/
    372 static PICOFFTSG_FFTTYPE picofftsg_mult_w_w(PICOFFTSG_FFTTYPE x1, PICOFFTSG_FFTTYPE y1);
    373 static PICOFFTSG_FFTTYPE picofftsg_mult_w_a(PICOFFTSG_FFTTYPE x1, PICOFFTSG_FFTTYPE y1);
    374 
    375 
    376 static void cftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    377 static void cftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    378 static void rftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    379 static void rftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    380 
    381 static void cftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    382 static void cftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    383 static void rftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    384 static void rftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    385 static void dctsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    386 static void dctsub4(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    387 
    388 static void ddct(picoos_int32 n, picoos_int32 isgn, PICOFFTSG_FFTTYPE *a);
    389 static void bitrv1(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    390 
    391 static void bitrv2(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    392 static void bitrv216(PICOFFTSG_FFTTYPE *a);
    393 static void bitrv208(PICOFFTSG_FFTTYPE *a);
    394 static void cftmdl1(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    395 static void cftrec4(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    396 static void cftleaf(picoos_int32 n, picoos_int32 isplt, PICOFFTSG_FFTTYPE *a);
    397 static void cftfx41(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    398 static void cftf161(PICOFFTSG_FFTTYPE *a);
    399 static void cftf081(PICOFFTSG_FFTTYPE *a);
    400 static void cftf040(PICOFFTSG_FFTTYPE *a);
    401 static void cftx020(PICOFFTSG_FFTTYPE *a);
    402 
    403 void bitrv2conj(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    404 void bitrv216neg(PICOFFTSG_FFTTYPE *a);
    405 void bitrv208neg(PICOFFTSG_FFTTYPE *a);
    406 void cftb1st(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    407 void cftrec4(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    408 void cftleaf(picoos_int32 n, picoos_int32 isplt, PICOFFTSG_FFTTYPE *a);
    409 void cftfx41(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    410 void cftf161(PICOFFTSG_FFTTYPE *a);
    411 void cftf081(PICOFFTSG_FFTTYPE *a);
    412 void cftb040(PICOFFTSG_FFTTYPE *a);
    413 void cftx020(PICOFFTSG_FFTTYPE *a);
    414 
    415 static picoos_int32 cfttree(picoos_int32 n, picoos_int32 j, picoos_int32 k, PICOFFTSG_FFTTYPE *a);
    416 static void cftleaf(picoos_int32 n, picoos_int32 isplt, PICOFFTSG_FFTTYPE *a);
    417 static void cftmdl1(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    418 
    419 static void cftmdl1(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    420 static void cftmdl2(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    421 
    422 static void cftmdl1(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    423 static void cftmdl2(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
    424 static void cftf161(PICOFFTSG_FFTTYPE *a);
    425 static void cftf162(PICOFFTSG_FFTTYPE *a);
    426 static void cftf081(PICOFFTSG_FFTTYPE *a);
    427 static void cftf082(PICOFFTSG_FFTTYPE *a);
    428 
    429 static void cftf161(PICOFFTSG_FFTTYPE *a);
    430 static void cftf162(PICOFFTSG_FFTTYPE *a);
    431 static void cftf081(PICOFFTSG_FFTTYPE *a);
    432 static void cftf082(PICOFFTSG_FFTTYPE *a);
    433 
    434 /* ***********************************************************************************************/
    435 /* Exported functions */
    436 /* ***********************************************************************************************/
    437 void rdft(picoos_int32 n, picoos_int32 isgn, PICOFFTSG_FFTTYPE *a)
    438 {
    439     PICOFFTSG_FFTTYPE xi;
    440 
    441     if (isgn >= 0) {
    442         if (n > 4) {
    443             cftfsub(n, a);
    444             rftfsub(n, a);
    445         } else if (n == 4) {
    446             cftfsub(n, a);
    447         }
    448         xi = a[0] - a[1];
    449         a[0] += a[1];
    450         a[1] = xi;
    451     } else {
    452         a[1] =  (a[0] - a[1]) / 2;
    453         a[0] -= a[1];
    454         if (n > 4) {
    455             rftbsub(n, a);
    456             cftbsub(n, a);
    457         } else if (n == 4) {
    458             cftbsub(n, a);
    459         }
    460     }
    461 
    462 }
    463 
    464 
    465 picoos_single norm_result(picoos_int32 m2, PICOFFTSG_FFTTYPE *tmpX, PICOFFTSG_FFTTYPE *norm_window)
    466 {
    467     picoos_int16 nI;
    468     PICOFFTSG_FFTTYPE a,b, E;
    469 
    470     E = (picoos_int32)0;
    471     for (nI=0; nI<m2; nI++) {
    472         a = (norm_window[nI]>>18) * ((tmpX[nI]>0) ? tmpX[nI]>>11 : -((-tmpX[nI])>>11));
    473         tmpX[nI] = a;
    474         b = (a>=0?a:-a)  >> 18;
    475         E += (b*b);
    476     }
    477 
    478     if (E>0) {
    479         return (picoos_single)sqrt((double)E/16.0)/m2;
    480     }
    481     else {
    482         return 0.0;
    483     }
    484 }
    485 
    486 void ddct(picoos_int32 n, picoos_int32 isgn, PICOFFTSG_FFTTYPE *a)
    487 {
    488     picoos_int32 j;
    489     PICOFFTSG_FFTTYPE xr;
    490 
    491     if (isgn < 0) {
    492         xr = a[n - 1];
    493         for (j = n - 2; j >= 2; j -= 2) {
    494             a[j + 1] = a[j] - a[j - 1];
    495             a[j] += a[j - 1];
    496         }
    497         a[1] = a[0] - xr;
    498         a[0] += xr;
    499         if (n > 4) {
    500             rftbsub(n, a);
    501             cftbsub(n, a);
    502         } else if (n == 4) {
    503             cftbsub(n, a);
    504         }
    505     }
    506     if (n > 4) {
    507         dctsub(n, a);
    508     } else {
    509         dctsub4(n, a);
    510     }
    511     if (isgn >= 0) {
    512         if (n > 4) {
    513             cftfsub(n, a);
    514             rftfsub(n, a);
    515         } else if (n == 4) {
    516             cftfsub(n, a);
    517         }
    518 
    519 
    520         xr = a[0] - a[1];
    521         a[0] += a[1];
    522         for (j = 2; j < n; j += 2) {
    523             a[j - 1] = a[j] - a[j + 1];
    524             a[j] += a[j + 1];
    525         }
    526         a[n - 1] = xr;
    527     }
    528 }
    529 
    530 void dfct_nmf(picoos_int32 n, picoos_int32 *a)
    531 {
    532     picoos_int32 j, k, m, mh;
    533     PICOFFTSG_FFTTYPE xr, xi, yr, yi, an;
    534     PICOFFTSG_FFTTYPE *aj, *ak, *amj, *amk;
    535 
    536     m = n >> 1;
    537     for (j = 0; j < m; j++) {
    538         k = n - j;
    539         xr = a[j] + a[k];
    540         a[j] -= a[k];
    541         a[k] = xr;
    542     }
    543     an = a[n];
    544     while (m >= 2) {
    545         ddct(m, 1, a);
    546         if (m > 2) {
    547             bitrv1(m, a);
    548         }
    549         mh = m >> 1;
    550         xi = a[m];
    551         a[m] = a[0];
    552         a[0] = an - xi;
    553         an += xi;
    554         k = m-1;
    555         aj = a + 1; ak = a + k; amj = aj + m; amk = ak + m;
    556         for (j = 1; j < mh; j++, aj++, ak--, amj++, amk--) {
    557             xr = *amk;
    558             xi = *amj;
    559             yr = *aj;
    560             yi = *ak;
    561             *amj = yr;
    562             *amk = yi;
    563             *aj = xr - xi;
    564             *ak = xr + xi;
    565         }
    566         xr = *aj;
    567         *aj = *amj;
    568         *amj = xr;
    569 
    570         m = mh;
    571     }
    572 
    573     xi = a[1];
    574     a[1] = a[0];
    575     a[0] = an + xi;
    576     a[n] = an - xi;
    577     if (n > 2) {
    578         bitrv1(n, a);
    579     }
    580 
    581 }
    582 
    583 /* ***********************************************************************************************/
    584 /* internal routines */
    585 /* ***********************************************************************************************/
    586 /*
    587   mult two numbers which are guaranteed to be in the range -1..1
    588   shift right as little as possible before mult, and the rest after the mult
    589   Also, shift bigger number more - lose less accuracy
    590  */
    591 static PICOFFTSG_FFTTYPE picofftsg_mult_w_w(PICOFFTSG_FFTTYPE x1, PICOFFTSG_FFTTYPE y1)
    592 {
    593     PICOFFTSG_FFTTYPE x, y;
    594     x = x1>=0 ? x1>>15 : -((-x1)>>15);
    595     y = y1>=0 ? y1>>14 : -((-y1)>>14);
    596     return  x * y;
    597 }
    598 
    599 static PICOFFTSG_FFTTYPE picofftsg_mult_w_a(PICOFFTSG_FFTTYPE x1, PICOFFTSG_FFTTYPE y1)
    600 {
    601     PICOFFTSG_FFTTYPE x, y;
    602 
    603 
    604     x = x1>=0 ? x1>>15 : -((-x1)>>15);
    605     y = y1>=0 ? y1>>14 : -((-y1)>>14);
    606     return x * y;
    607 }
    608 
    609 static void cftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
    610 {
    611 
    612     if (n > 8) {
    613         if (n > 32) {
    614             cftmdl1(n, a);
    615                 if (n > 512) {
    616                     cftrec4(n, a);
    617                 } else if (n > 128) {
    618                     cftleaf(n, 1, a);
    619                 } else {
    620                     cftfx41(n, a);
    621                 }
    622             bitrv2(n, a);
    623         } else if (n == 32) {
    624             cftf161(a);
    625             bitrv216(a);
    626         } else {
    627             cftf081(a);
    628             bitrv208(a);
    629         }
    630     } else if (n == 8) {
    631         cftf040(a);
    632     } else if (n == 4) {
    633         cftx020(a);
    634     }
    635 }
    636 
    637 
    638 void cftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
    639 {
    640     if (n > 8) {
    641         if (n > 32) {
    642             cftb1st(n, a);
    643                 if (n > 512) {
    644                     cftrec4(n, a);
    645                 } else if (n > 128) {
    646                     cftleaf(n, 1, a);
    647                 } else {
    648                     cftfx41(n, a);
    649                 }
    650             bitrv2conj(n, a);
    651         } else if (n == 32) {
    652             cftf161(a);
    653             bitrv216neg(a);
    654         } else {
    655             cftf081(a);
    656             bitrv208neg(a);
    657         }
    658     } else if (n == 8) {
    659         cftb040(a);
    660     } else if (n == 4) {
    661         cftx020(a);
    662     }
    663 }
    664 
    665 /* **************************************************************************************************/
    666 
    667 /* **************************************************************************************************/
    668 void bitrv2(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
    669 {
    670     picoos_int32 j0, k0, j1, k1, l, m, i, j, k, nh, m2;
    671     PICOFFTSG_FFTTYPE xr, xi, yr, yi;
    672 
    673     m = 4;
    674     for (l = n >> 2; l > 8; l >>= 2) {
    675         m <<= 1;
    676     }
    677     m2 = m + m;
    678     nh = n >> 1;
    679     if (l == 8) {
    680         j0 = 0;
    681         for (k0 = 0; k0 < m; k0 += 4) {
    682             k = k0;
    683             for (j = j0; j < j0 + k0; j += 4) {
    684                 xr = a[j];
    685                 xi = a[j + 1];
    686                 yr = a[k];
    687                 yi = a[k + 1];
    688                 a[j] = yr;
    689                 a[j + 1] = yi;
    690                 a[k] = xr;
    691                 a[k + 1] = xi;
    692                 j1 = j + m;
    693                 k1 = k + m2;
    694                 xr = a[j1];
    695                 xi = a[j1 + 1];
    696                 yr = a[k1];
    697                 yi = a[k1 + 1];
    698                 a[j1] = yr;
    699                 a[j1 + 1] = yi;
    700                 a[k1] = xr;
    701                 a[k1 + 1] = xi;
    702                 j1 += m;
    703                 k1 -= m;
    704                 xr = a[j1];
    705                 xi = a[j1 + 1];
    706                 yr = a[k1];
    707                 yi = a[k1 + 1];
    708                 a[j1] = yr;
    709                 a[j1 + 1] = yi;
    710                 a[k1] = xr;
    711                 a[k1 + 1] = xi;
    712                 j1 += m;
    713                 k1 += m2;
    714                 xr = a[j1];
    715                 xi = a[j1 + 1];
    716                 yr = a[k1];
    717                 yi = a[k1 + 1];
    718                 a[j1] = yr;
    719                 a[j1 + 1] = yi;
    720                 a[k1] = xr;
    721                 a[k1 + 1] = xi;
    722                 j1 += nh;
    723                 k1 += 2;
    724                 xr = a[j1];
    725                 xi = a[j1 + 1];
    726                 yr = a[k1];
    727                 yi = a[k1 + 1];
    728                 a[j1] = yr;
    729                 a[j1 + 1] = yi;
    730                 a[k1] = xr;
    731                 a[k1 + 1] = xi;
    732                 j1 -= m;
    733                 k1 -= m2;
    734                 xr = a[j1];
    735                 xi = a[j1 + 1];
    736                 yr = a[k1];
    737                 yi = a[k1 + 1];
    738                 a[j1] = yr;
    739                 a[j1 + 1] = yi;
    740                 a[k1] = xr;
    741                 a[k1 + 1] = xi;
    742                 j1 -= m;
    743                 k1 += m;
    744                 xr = a[j1];
    745                 xi = a[j1 + 1];
    746                 yr = a[k1];
    747                 yi = a[k1 + 1];
    748                 a[j1] = yr;
    749                 a[j1 + 1] = yi;
    750                 a[k1] = xr;
    751                 a[k1 + 1] = xi;
    752                 j1 -= m;
    753                 k1 -= m2;
    754                 xr = a[j1];
    755                 xi = a[j1 + 1];
    756                 yr = a[k1];
    757                 yi = a[k1 + 1];
    758                 a[j1] = yr;
    759                 a[j1 + 1] = yi;
    760                 a[k1] = xr;
    761                 a[k1 + 1] = xi;
    762                 j1 += 2;
    763                 k1 += nh;
    764                 xr = a[j1];
    765                 xi = a[j1 + 1];
    766                 yr = a[k1];
    767                 yi = a[k1 + 1];
    768                 a[j1] = yr;
    769                 a[j1 + 1] = yi;
    770                 a[k1] = xr;
    771                 a[k1 + 1] = xi;
    772                 j1 += m;
    773                 k1 += m2;
    774                 xr = a[j1];
    775                 xi = a[j1 + 1];
    776                 yr = a[k1];
    777                 yi = a[k1 + 1];
    778                 a[j1] = yr;
    779                 a[j1 + 1] = yi;
    780                 a[k1] = xr;
    781                 a[k1 + 1] = xi;
    782                 j1 += m;
    783                 k1 -= m;
    784                 xr = a[j1];
    785                 xi = a[j1 + 1];
    786                 yr = a[k1];
    787                 yi = a[k1 + 1];
    788                 a[j1] = yr;
    789                 a[j1 + 1] = yi;
    790                 a[k1] = xr;
    791                 a[k1 + 1] = xi;
    792                 j1 += m;
    793                 k1 += m2;
    794                 xr = a[j1];
    795                 xi = a[j1 + 1];
    796                 yr = a[k1];
    797                 yi = a[k1 + 1];
    798                 a[j1] = yr;
    799                 a[j1 + 1] = yi;
    800                 a[k1] = xr;
    801                 a[k1 + 1] = xi;
    802                 j1 -= nh;
    803                 k1 -= 2;
    804                 xr = a[j1];
    805                 xi = a[j1 + 1];
    806                 yr = a[k1];
    807                 yi = a[k1 + 1];
    808                 a[j1] = yr;
    809                 a[j1 + 1] = yi;
    810                 a[k1] = xr;
    811                 a[k1 + 1] = xi;
    812                 j1 -= m;
    813                 k1 -= m2;
    814                 xr = a[j1];
    815                 xi = a[j1 + 1];
    816                 yr = a[k1];
    817                 yi = a[k1 + 1];
    818                 a[j1] = yr;
    819                 a[j1 + 1] = yi;
    820                 a[k1] = xr;
    821                 a[k1 + 1] = xi;
    822                 j1 -= m;
    823                 k1 += m;
    824                 xr = a[j1];
    825                 xi = a[j1 + 1];
    826                 yr = a[k1];
    827                 yi = a[k1 + 1];
    828                 a[j1] = yr;
    829                 a[j1 + 1] = yi;
    830                 a[k1] = xr;
    831                 a[k1 + 1] = xi;
    832                 j1 -= m;
    833                 k1 -= m2;
    834                 xr = a[j1];
    835                 xi = a[j1 + 1];
    836                 yr = a[k1];
    837                 yi = a[k1 + 1];
    838                 a[j1] = yr;
    839                 a[j1 + 1] = yi;
    840                 a[k1] = xr;
    841                 a[k1 + 1] = xi;
    842                 for (i = nh >> 1; i > (k ^= i); i >>= 1) {
    843                     /* Avoid warning*/
    844                 };
    845             }
    846             k1 = j0 + k0;
    847             j1 = k1 + 2;
    848             k1 += nh;
    849             xr = a[j1];
    850             xi = a[j1 + 1];
    851             yr = a[k1];
    852             yi = a[k1 + 1];
    853             a[j1] = yr;
    854             a[j1 + 1] = yi;
    855             a[k1] = xr;
    856             a[k1 + 1] = xi;
    857             j1 += m;
    858             k1 += m2;
    859             xr = a[j1];
    860             xi = a[j1 + 1];
    861             yr = a[k1];
    862             yi = a[k1 + 1];
    863             a[j1] = yr;
    864             a[j1 + 1] = yi;
    865             a[k1] = xr;
    866             a[k1 + 1] = xi;
    867             j1 += m;
    868             k1 -= m;
    869             xr = a[j1];
    870             xi = a[j1 + 1];
    871             yr = a[k1];
    872             yi = a[k1 + 1];
    873             a[j1] = yr;
    874             a[j1 + 1] = yi;
    875             a[k1] = xr;
    876             a[k1 + 1] = xi;
    877             j1 -= 2;
    878             k1 -= nh;
    879             xr = a[j1];
    880             xi = a[j1 + 1];
    881             yr = a[k1];
    882             yi = a[k1 + 1];
    883             a[j1] = yr;
    884             a[j1 + 1] = yi;
    885             a[k1] = xr;
    886             a[k1 + 1] = xi;
    887             j1 += nh + 2;
    888             k1 += nh + 2;
    889             xr = a[j1];
    890             xi = a[j1 + 1];
    891             yr = a[k1];
    892             yi = a[k1 + 1];
    893             a[j1] = yr;
    894             a[j1 + 1] = yi;
    895             a[k1] = xr;
    896             a[k1 + 1] = xi;
    897             j1 -= nh - m;
    898             k1 += m2 - 2;
    899             xr = a[j1];
    900             xi = a[j1 + 1];
    901             yr = a[k1];
    902             yi = a[k1 + 1];
    903             a[j1] = yr;
    904             a[j1 + 1] = yi;
    905             a[k1] = xr;
    906             a[k1 + 1] = xi;
    907             for (i = nh >> 1; i > (j0 ^= i); i >>= 1) {
    908                 /* Avoid warning */
    909             }
    910         }
    911     } else {
    912         j0 = 0;
    913         for (k0 = 0; k0 < m; k0 += 4) {
    914             k = k0;
    915             for (j = j0; j < j0 + k0; j += 4) {
    916                 xr = a[j];
    917                 xi = a[j + 1];
    918                 yr = a[k];
    919                 yi = a[k + 1];
    920                 a[j] = yr;
    921                 a[j + 1] = yi;
    922                 a[k] = xr;
    923                 a[k + 1] = xi;
    924                 j1 = j + m;
    925                 k1 = k + m;
    926                 xr = a[j1];
    927                 xi = a[j1 + 1];
    928                 yr = a[k1];
    929                 yi = a[k1 + 1];
    930                 a[j1] = yr;
    931                 a[j1 + 1] = yi;
    932                 a[k1] = xr;
    933                 a[k1 + 1] = xi;
    934                 j1 += nh;
    935                 k1 += 2;
    936                 xr = a[j1];
    937                 xi = a[j1 + 1];
    938                 yr = a[k1];
    939                 yi = a[k1 + 1];
    940                 a[j1] = yr;
    941                 a[j1 + 1] = yi;
    942                 a[k1] = xr;
    943                 a[k1 + 1] = xi;
    944                 j1 -= m;
    945                 k1 -= m;
    946                 xr = a[j1];
    947                 xi = a[j1 + 1];
    948                 yr = a[k1];
    949                 yi = a[k1 + 1];
    950                 a[j1] = yr;
    951                 a[j1 + 1] = yi;
    952                 a[k1] = xr;
    953                 a[k1 + 1] = xi;
    954                 j1 += 2;
    955                 k1 += nh;
    956                 xr = a[j1];
    957                 xi = a[j1 + 1];
    958                 yr = a[k1];
    959                 yi = a[k1 + 1];
    960                 a[j1] = yr;
    961                 a[j1 + 1] = yi;
    962                 a[k1] = xr;
    963                 a[k1 + 1] = xi;
    964                 j1 += m;
    965                 k1 += m;
    966                 xr = a[j1];
    967                 xi = a[j1 + 1];
    968                 yr = a[k1];
    969                 yi = a[k1 + 1];
    970                 a[j1] = yr;
    971                 a[j1 + 1] = yi;
    972                 a[k1] = xr;
    973                 a[k1 + 1] = xi;
    974                 j1 -= nh;
    975                 k1 -= 2;
    976                 xr = a[j1];
    977                 xi = a[j1 + 1];
    978                 yr = a[k1];
    979                 yi = a[k1 + 1];
    980                 a[j1] = yr;
    981                 a[j1 + 1] = yi;
    982                 a[k1] = xr;
    983                 a[k1 + 1] = xi;
    984                 j1 -= m;
    985                 k1 -= m;
    986                 xr = a[j1];
    987                 xi = a[j1 + 1];
    988                 yr = a[k1];
    989                 yi = a[k1 + 1];
    990                 a[j1] = yr;
    991                 a[j1 + 1] = yi;
    992                 a[k1] = xr;
    993                 a[k1 + 1] = xi;
    994                 for (i = nh >> 1; i > (k ^= i); i >>= 1){
    995                     /* Avoid warning */
    996                 }
    997             }
    998             k1 = j0 + k0;
    999             j1 = k1 + 2;
   1000             k1 += nh;
   1001             xr = a[j1];
   1002             xi = a[j1 + 1];
   1003             yr = a[k1];
   1004             yi = a[k1 + 1];
   1005             a[j1] = yr;
   1006             a[j1 + 1] = yi;
   1007             a[k1] = xr;
   1008             a[k1 + 1] = xi;
   1009             j1 += m;
   1010             k1 += m;
   1011             xr = a[j1];
   1012             xi = a[j1 + 1];
   1013             yr = a[k1];
   1014             yi = a[k1 + 1];
   1015             a[j1] = yr;
   1016             a[j1 + 1] = yi;
   1017             a[k1] = xr;
   1018             a[k1 + 1] = xi;
   1019             for (i = nh >> 1; i > (j0 ^= i); i >>= 1){
   1020                 /* Avoid  warning */
   1021             }
   1022         }
   1023     }
   1024 }
   1025 
   1026 
   1027 void bitrv2conj(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   1028 {
   1029     picoos_int32 j0, k0, j1, k1, l, m, i, j, k, nh, m2;
   1030     PICOFFTSG_FFTTYPE xr, xi, yr, yi;
   1031 
   1032 
   1033     m = 4;
   1034     for (l = n >> 2; l > 8; l >>= 2) {
   1035         m <<= 1;
   1036     }
   1037     m2 = m + m;
   1038     nh = n >> 1;
   1039     if (l == 8) {
   1040         j0 = 0;
   1041         for (k0 = 0; k0 < m; k0 += 4) {
   1042             k = k0;
   1043             for (j = j0; j < j0 + k0; j += 4) {
   1044                 xr = a[j];
   1045                 xi = -a[j + 1];
   1046                 yr = a[k];
   1047                 yi = -a[k + 1];
   1048                 a[j] = yr;
   1049                 a[j + 1] = yi;
   1050                 a[k] = xr;
   1051                 a[k + 1] = xi;
   1052                 j1 = j + m;
   1053                 k1 = k + m2;
   1054                 xr = a[j1];
   1055                 xi = -a[j1 + 1];
   1056                 yr = a[k1];
   1057                 yi = -a[k1 + 1];
   1058                 a[j1] = yr;
   1059                 a[j1 + 1] = yi;
   1060                 a[k1] = xr;
   1061                 a[k1 + 1] = xi;
   1062                 j1 += m;
   1063                 k1 -= m;
   1064                 xr = a[j1];
   1065                 xi = -a[j1 + 1];
   1066                 yr = a[k1];
   1067                 yi = -a[k1 + 1];
   1068                 a[j1] = yr;
   1069                 a[j1 + 1] = yi;
   1070                 a[k1] = xr;
   1071                 a[k1 + 1] = xi;
   1072                 j1 += m;
   1073                 k1 += m2;
   1074                 xr = a[j1];
   1075                 xi = -a[j1 + 1];
   1076                 yr = a[k1];
   1077                 yi = -a[k1 + 1];
   1078                 a[j1] = yr;
   1079                 a[j1 + 1] = yi;
   1080                 a[k1] = xr;
   1081                 a[k1 + 1] = xi;
   1082                 j1 += nh;
   1083                 k1 += 2;
   1084                 xr = a[j1];
   1085                 xi = -a[j1 + 1];
   1086                 yr = a[k1];
   1087                 yi = -a[k1 + 1];
   1088                 a[j1] = yr;
   1089                 a[j1 + 1] = yi;
   1090                 a[k1] = xr;
   1091                 a[k1 + 1] = xi;
   1092                 j1 -= m;
   1093                 k1 -= m2;
   1094                 xr = a[j1];
   1095                 xi = -a[j1 + 1];
   1096                 yr = a[k1];
   1097                 yi = -a[k1 + 1];
   1098                 a[j1] = yr;
   1099                 a[j1 + 1] = yi;
   1100                 a[k1] = xr;
   1101                 a[k1 + 1] = xi;
   1102                 j1 -= m;
   1103                 k1 += m;
   1104                 xr = a[j1];
   1105                 xi = -a[j1 + 1];
   1106                 yr = a[k1];
   1107                 yi = -a[k1 + 1];
   1108                 a[j1] = yr;
   1109                 a[j1 + 1] = yi;
   1110                 a[k1] = xr;
   1111                 a[k1 + 1] = xi;
   1112                 j1 -= m;
   1113                 k1 -= m2;
   1114                 xr = a[j1];
   1115                 xi = -a[j1 + 1];
   1116                 yr = a[k1];
   1117                 yi = -a[k1 + 1];
   1118                 a[j1] = yr;
   1119                 a[j1 + 1] = yi;
   1120                 a[k1] = xr;
   1121                 a[k1 + 1] = xi;
   1122                 j1 += 2;
   1123                 k1 += nh;
   1124                 xr = a[j1];
   1125                 xi = -a[j1 + 1];
   1126                 yr = a[k1];
   1127                 yi = -a[k1 + 1];
   1128                 a[j1] = yr;
   1129                 a[j1 + 1] = yi;
   1130                 a[k1] = xr;
   1131                 a[k1 + 1] = xi;
   1132                 j1 += m;
   1133                 k1 += m2;
   1134                 xr = a[j1];
   1135                 xi = -a[j1 + 1];
   1136                 yr = a[k1];
   1137                 yi = -a[k1 + 1];
   1138                 a[j1] = yr;
   1139                 a[j1 + 1] = yi;
   1140                 a[k1] = xr;
   1141                 a[k1 + 1] = xi;
   1142                 j1 += m;
   1143                 k1 -= m;
   1144                 xr = a[j1];
   1145                 xi = -a[j1 + 1];
   1146                 yr = a[k1];
   1147                 yi = -a[k1 + 1];
   1148                 a[j1] = yr;
   1149                 a[j1 + 1] = yi;
   1150                 a[k1] = xr;
   1151                 a[k1 + 1] = xi;
   1152                 j1 += m;
   1153                 k1 += m2;
   1154                 xr = a[j1];
   1155                 xi = -a[j1 + 1];
   1156                 yr = a[k1];
   1157                 yi = -a[k1 + 1];
   1158                 a[j1] = yr;
   1159                 a[j1 + 1] = yi;
   1160                 a[k1] = xr;
   1161                 a[k1 + 1] = xi;
   1162                 j1 -= nh;
   1163                 k1 -= 2;
   1164                 xr = a[j1];
   1165                 xi = -a[j1 + 1];
   1166                 yr = a[k1];
   1167                 yi = -a[k1 + 1];
   1168                 a[j1] = yr;
   1169                 a[j1 + 1] = yi;
   1170                 a[k1] = xr;
   1171                 a[k1 + 1] = xi;
   1172                 j1 -= m;
   1173                 k1 -= m2;
   1174                 xr = a[j1];
   1175                 xi = -a[j1 + 1];
   1176                 yr = a[k1];
   1177                 yi = -a[k1 + 1];
   1178                 a[j1] = yr;
   1179                 a[j1 + 1] = yi;
   1180                 a[k1] = xr;
   1181                 a[k1 + 1] = xi;
   1182                 j1 -= m;
   1183                 k1 += m;
   1184                 xr = a[j1];
   1185                 xi = -a[j1 + 1];
   1186                 yr = a[k1];
   1187                 yi = -a[k1 + 1];
   1188                 a[j1] = yr;
   1189                 a[j1 + 1] = yi;
   1190                 a[k1] = xr;
   1191                 a[k1 + 1] = xi;
   1192                 j1 -= m;
   1193                 k1 -= m2;
   1194                 xr = a[j1];
   1195                 xi = -a[j1 + 1];
   1196                 yr = a[k1];
   1197                 yi = -a[k1 + 1];
   1198                 a[j1] = yr;
   1199                 a[j1 + 1] = yi;
   1200                 a[k1] = xr;
   1201                 a[k1 + 1] = xi;
   1202                 for (i = nh >> 1; i > (k ^= i); i >>= 1) {
   1203                     /* Avoid warning */
   1204                 }
   1205             }
   1206             k1 = j0 + k0;
   1207             j1 = k1 + 2;
   1208             k1 += nh;
   1209             a[j1 - 1] = -a[j1 - 1];
   1210             xr = a[j1];
   1211             xi = -a[j1 + 1];
   1212             yr = a[k1];
   1213             yi = -a[k1 + 1];
   1214             a[j1] = yr;
   1215             a[j1 + 1] = yi;
   1216             a[k1] = xr;
   1217             a[k1 + 1] = xi;
   1218             a[k1 + 3] = -a[k1 + 3];
   1219             j1 += m;
   1220             k1 += m2;
   1221             xr = a[j1];
   1222             xi = -a[j1 + 1];
   1223             yr = a[k1];
   1224             yi = -a[k1 + 1];
   1225             a[j1] = yr;
   1226             a[j1 + 1] = yi;
   1227             a[k1] = xr;
   1228             a[k1 + 1] = xi;
   1229             j1 += m;
   1230             k1 -= m;
   1231             xr = a[j1];
   1232             xi = -a[j1 + 1];
   1233             yr = a[k1];
   1234             yi = -a[k1 + 1];
   1235             a[j1] = yr;
   1236             a[j1 + 1] = yi;
   1237             a[k1] = xr;
   1238             a[k1 + 1] = xi;
   1239             j1 -= 2;
   1240             k1 -= nh;
   1241             xr = a[j1];
   1242             xi = -a[j1 + 1];
   1243             yr = a[k1];
   1244             yi = -a[k1 + 1];
   1245             a[j1] = yr;
   1246             a[j1 + 1] = yi;
   1247             a[k1] = xr;
   1248             a[k1 + 1] = xi;
   1249             j1 += nh + 2;
   1250             k1 += nh + 2;
   1251             xr = a[j1];
   1252             xi = -a[j1 + 1];
   1253             yr = a[k1];
   1254             yi = -a[k1 + 1];
   1255             a[j1] = yr;
   1256             a[j1 + 1] = yi;
   1257             a[k1] = xr;
   1258             a[k1 + 1] = xi;
   1259             j1 -= nh - m;
   1260             k1 += m2 - 2;
   1261             a[j1 - 1] = -a[j1 - 1];
   1262             xr = a[j1];
   1263             xi = -a[j1 + 1];
   1264             yr = a[k1];
   1265             yi = -a[k1 + 1];
   1266             a[j1] = yr;
   1267             a[j1 + 1] = yi;
   1268             a[k1] = xr;
   1269             a[k1 + 1] = xi;
   1270             a[k1 + 3] = -a[k1 + 3];
   1271             for (i = nh >> 1; i > (j0 ^= i); i >>= 1) { /* Avoid warning*/
   1272 
   1273             }
   1274         }
   1275     } else {
   1276         j0 = 0;
   1277         for (k0 = 0; k0 < m; k0 += 4) {
   1278             k = k0;
   1279             for (j = j0; j < j0 + k0; j += 4) {
   1280                 xr = a[j];
   1281                 xi = -a[j + 1];
   1282                 yr = a[k];
   1283                 yi = -a[k + 1];
   1284                 a[j] = yr;
   1285                 a[j + 1] = yi;
   1286                 a[k] = xr;
   1287                 a[k + 1] = xi;
   1288                 j1 = j + m;
   1289                 k1 = k + m;
   1290                 xr = a[j1];
   1291                 xi = -a[j1 + 1];
   1292                 yr = a[k1];
   1293                 yi = -a[k1 + 1];
   1294                 a[j1] = yr;
   1295                 a[j1 + 1] = yi;
   1296                 a[k1] = xr;
   1297                 a[k1 + 1] = xi;
   1298                 j1 += nh;
   1299                 k1 += 2;
   1300                 xr = a[j1];
   1301                 xi = -a[j1 + 1];
   1302                 yr = a[k1];
   1303                 yi = -a[k1 + 1];
   1304                 a[j1] = yr;
   1305                 a[j1 + 1] = yi;
   1306                 a[k1] = xr;
   1307                 a[k1 + 1] = xi;
   1308                 j1 -= m;
   1309                 k1 -= m;
   1310                 xr = a[j1];
   1311                 xi = -a[j1 + 1];
   1312                 yr = a[k1];
   1313                 yi = -a[k1 + 1];
   1314                 a[j1] = yr;
   1315                 a[j1 + 1] = yi;
   1316                 a[k1] = xr;
   1317                 a[k1 + 1] = xi;
   1318                 j1 += 2;
   1319                 k1 += nh;
   1320                 xr = a[j1];
   1321                 xi = -a[j1 + 1];
   1322                 yr = a[k1];
   1323                 yi = -a[k1 + 1];
   1324                 a[j1] = yr;
   1325                 a[j1 + 1] = yi;
   1326                 a[k1] = xr;
   1327                 a[k1 + 1] = xi;
   1328                 j1 += m;
   1329                 k1 += m;
   1330                 xr = a[j1];
   1331                 xi = -a[j1 + 1];
   1332                 yr = a[k1];
   1333                 yi = -a[k1 + 1];
   1334                 a[j1] = yr;
   1335                 a[j1 + 1] = yi;
   1336                 a[k1] = xr;
   1337                 a[k1 + 1] = xi;
   1338                 j1 -= nh;
   1339                 k1 -= 2;
   1340                 xr = a[j1];
   1341                 xi = -a[j1 + 1];
   1342                 yr = a[k1];
   1343                 yi = -a[k1 + 1];
   1344                 a[j1] = yr;
   1345                 a[j1 + 1] = yi;
   1346                 a[k1] = xr;
   1347                 a[k1 + 1] = xi;
   1348                 j1 -= m;
   1349                 k1 -= m;
   1350                 xr = a[j1];
   1351                 xi = -a[j1 + 1];
   1352                 yr = a[k1];
   1353                 yi = -a[k1 + 1];
   1354                 a[j1] = yr;
   1355                 a[j1 + 1] = yi;
   1356                 a[k1] = xr;
   1357                 a[k1 + 1] = xi;
   1358                 for (i = nh >> 1; i > (k ^= i); i >>= 1) {
   1359                     /* Avoid warning*/
   1360                 }
   1361             }
   1362             k1 = j0 + k0;
   1363             j1 = k1 + 2;
   1364             k1 += nh;
   1365             a[j1 - 1] = -a[j1 - 1];
   1366             xr = a[j1];
   1367             xi = -a[j1 + 1];
   1368             yr = a[k1];
   1369             yi = -a[k1 + 1];
   1370             a[j1] = yr;
   1371             a[j1 + 1] = yi;
   1372             a[k1] = xr;
   1373             a[k1 + 1] = xi;
   1374             a[k1 + 3] = -a[k1 + 3];
   1375             j1 += m;
   1376             k1 += m;
   1377             a[j1 - 1] = -a[j1 - 1];
   1378             xr = a[j1];
   1379             xi = -a[j1 + 1];
   1380             yr = a[k1];
   1381             yi = -a[k1 + 1];
   1382             a[j1] = yr;
   1383             a[j1 + 1] = yi;
   1384             a[k1] = xr;
   1385             a[k1 + 1] = xi;
   1386             a[k1 + 3] = -a[k1 + 3];
   1387             for (i = nh >> 1; i > (j0 ^= i); i >>= 1) {
   1388                 /* Avoid warning*/
   1389             }
   1390         }
   1391     }
   1392 }
   1393 
   1394 
   1395 void bitrv216(PICOFFTSG_FFTTYPE *a)
   1396 {
   1397     PICOFFTSG_FFTTYPE x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
   1398         x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i,
   1399         x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
   1400 
   1401     x1r = a[2];
   1402     x1i = a[3];
   1403     x2r = a[4];
   1404     x2i = a[5];
   1405     x3r = a[6];
   1406     x3i = a[7];
   1407     x4r = a[8];
   1408     x4i = a[9];
   1409     x5r = a[10];
   1410     x5i = a[11];
   1411     x7r = a[14];
   1412     x7i = a[15];
   1413     x8r = a[16];
   1414     x8i = a[17];
   1415     x10r = a[20];
   1416     x10i = a[21];
   1417     x11r = a[22];
   1418     x11i = a[23];
   1419     x12r = a[24];
   1420     x12i = a[25];
   1421     x13r = a[26];
   1422     x13i = a[27];
   1423     x14r = a[28];
   1424     x14i = a[29];
   1425     a[2] = x8r;
   1426     a[3] = x8i;
   1427     a[4] = x4r;
   1428     a[5] = x4i;
   1429     a[6] = x12r;
   1430     a[7] = x12i;
   1431     a[8] = x2r;
   1432     a[9] = x2i;
   1433     a[10] = x10r;
   1434     a[11] = x10i;
   1435     a[14] = x14r;
   1436     a[15] = x14i;
   1437     a[16] = x1r;
   1438     a[17] = x1i;
   1439     a[20] = x5r;
   1440     a[21] = x5i;
   1441     a[22] = x13r;
   1442     a[23] = x13i;
   1443     a[24] = x3r;
   1444     a[25] = x3i;
   1445     a[26] = x11r;
   1446     a[27] = x11i;
   1447     a[28] = x7r;
   1448     a[29] = x7i;
   1449 }
   1450 
   1451 
   1452 void bitrv216neg(PICOFFTSG_FFTTYPE *a)
   1453 {
   1454     PICOFFTSG_FFTTYPE x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
   1455         x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i,
   1456         x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i,
   1457         x13r, x13i, x14r, x14i, x15r, x15i;
   1458 
   1459     x1r = a[2];
   1460     x1i = a[3];
   1461     x2r = a[4];
   1462     x2i = a[5];
   1463     x3r = a[6];
   1464     x3i = a[7];
   1465     x4r = a[8];
   1466     x4i = a[9];
   1467     x5r = a[10];
   1468     x5i = a[11];
   1469     x6r = a[12];
   1470     x6i = a[13];
   1471     x7r = a[14];
   1472     x7i = a[15];
   1473     x8r = a[16];
   1474     x8i = a[17];
   1475     x9r = a[18];
   1476     x9i = a[19];
   1477     x10r = a[20];
   1478     x10i = a[21];
   1479     x11r = a[22];
   1480     x11i = a[23];
   1481     x12r = a[24];
   1482     x12i = a[25];
   1483     x13r = a[26];
   1484     x13i = a[27];
   1485     x14r = a[28];
   1486     x14i = a[29];
   1487     x15r = a[30];
   1488     x15i = a[31];
   1489     a[2] = x15r;
   1490     a[3] = x15i;
   1491     a[4] = x7r;
   1492     a[5] = x7i;
   1493     a[6] = x11r;
   1494     a[7] = x11i;
   1495     a[8] = x3r;
   1496     a[9] = x3i;
   1497     a[10] = x13r;
   1498     a[11] = x13i;
   1499     a[12] = x5r;
   1500     a[13] = x5i;
   1501     a[14] = x9r;
   1502     a[15] = x9i;
   1503     a[16] = x1r;
   1504     a[17] = x1i;
   1505     a[18] = x14r;
   1506     a[19] = x14i;
   1507     a[20] = x6r;
   1508     a[21] = x6i;
   1509     a[22] = x10r;
   1510     a[23] = x10i;
   1511     a[24] = x2r;
   1512     a[25] = x2i;
   1513     a[26] = x12r;
   1514     a[27] = x12i;
   1515     a[28] = x4r;
   1516     a[29] = x4i;
   1517     a[30] = x8r;
   1518     a[31] = x8i;
   1519 }
   1520 
   1521 
   1522 void bitrv208(PICOFFTSG_FFTTYPE *a)
   1523 {
   1524     PICOFFTSG_FFTTYPE x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
   1525 
   1526     x1r = a[2];
   1527     x1i = a[3];
   1528     x3r = a[6];
   1529     x3i = a[7];
   1530     x4r = a[8];
   1531     x4i = a[9];
   1532     x6r = a[12];
   1533     x6i = a[13];
   1534     a[2] = x4r;
   1535     a[3] = x4i;
   1536     a[6] = x6r;
   1537     a[7] = x6i;
   1538     a[8] = x1r;
   1539     a[9] = x1i;
   1540     a[12] = x3r;
   1541     a[13] = x3i;
   1542 }
   1543 
   1544 
   1545 void bitrv208neg(PICOFFTSG_FFTTYPE *a)
   1546 {
   1547     PICOFFTSG_FFTTYPE x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
   1548         x5r, x5i, x6r, x6i, x7r, x7i;
   1549 
   1550     x1r = a[2];
   1551     x1i = a[3];
   1552     x2r = a[4];
   1553     x2i = a[5];
   1554     x3r = a[6];
   1555     x3i = a[7];
   1556     x4r = a[8];
   1557     x4i = a[9];
   1558     x5r = a[10];
   1559     x5i = a[11];
   1560     x6r = a[12];
   1561     x6i = a[13];
   1562     x7r = a[14];
   1563     x7i = a[15];
   1564     a[2] = x7r;
   1565     a[3] = x7i;
   1566     a[4] = x3r;
   1567     a[5] = x3i;
   1568     a[6] = x5r;
   1569     a[7] = x5i;
   1570     a[8] = x1r;
   1571     a[9] = x1i;
   1572     a[10] = x6r;
   1573     a[11] = x6i;
   1574     a[12] = x2r;
   1575     a[13] = x2i;
   1576     a[14] = x4r;
   1577     a[15] = x4i;
   1578 }
   1579 
   1580 
   1581 void bitrv1(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   1582 {
   1583     picoos_int32 j0, k0, j1, k1, l, m, i, j, k, nh;
   1584     PICOFFTSG_FFTTYPE x;
   1585     nh = n >> 1;
   1586     x = a[1];
   1587     a[1] = a[nh];
   1588     a[nh] = x;
   1589     m = 2;
   1590     for (l = n >> 2; l > 2; l >>= 2) {
   1591         m <<= 1;
   1592     }
   1593     if (l == 2) {
   1594         j1 = m + 1;
   1595         k1 = m + nh;
   1596         x = a[j1];
   1597         a[j1] = a[k1];
   1598         a[k1] = x;
   1599         j0 = 0;
   1600         for (k0 = 2; k0 < m; k0 += 2) {
   1601             for (i = nh >> 1; i > (j0 ^= i); i >>= 1) {
   1602                 /* Avoid warning*/
   1603             }
   1604             k = k0;
   1605             for (j = j0; j < j0 + k0; j += 2) {
   1606                 x = a[j];
   1607                 a[j] = a[k];
   1608                 a[k] = x;
   1609                 j1 = j + m;
   1610                 k1 = k + m;
   1611                 x = a[j1];
   1612                 a[j1] = a[k1];
   1613                 a[k1] = x;
   1614                 j1 += nh;
   1615                 k1++;
   1616                 x = a[j1];
   1617                 a[j1] = a[k1];
   1618                 a[k1] = x;
   1619                 j1 -= m;
   1620                 k1 -= m;
   1621                 x = a[j1];
   1622                 a[j1] = a[k1];
   1623                 a[k1] = x;
   1624                 j1++;
   1625                 k1 += nh;
   1626                 x = a[j1];
   1627                 a[j1] = a[k1];
   1628                 a[k1] = x;
   1629                 j1 += m;
   1630                 k1 += m;
   1631                 x = a[j1];
   1632                 a[j1] = a[k1];
   1633                 a[k1] = x;
   1634                 j1 -= nh;
   1635                 k1--;
   1636                 x = a[j1];
   1637                 a[j1] = a[k1];
   1638                 a[k1] = x;
   1639                 j1 -= m;
   1640                 k1 -= m;
   1641                 x = a[j1];
   1642                 a[j1] = a[k1];
   1643                 a[k1] = x;
   1644                 for (i = nh >> 1; i > (k ^= i); i >>= 1) {
   1645                     /* Avoid warning*/
   1646                 }
   1647             }
   1648             k1 = j0 + k0;
   1649             j1 = k1 + 1;
   1650             k1 += nh;
   1651             x = a[j1];
   1652             a[j1] = a[k1];
   1653             a[k1] = x;
   1654             j1 += m;
   1655             k1 += m;
   1656             x = a[j1];
   1657             a[j1] = a[k1];
   1658             a[k1] = x;
   1659         }
   1660     } else {
   1661         j0 = 0;
   1662         for (k0 = 2; k0 < m; k0 += 2) {
   1663             for (i = nh >> 1; i > (j0 ^= i); i >>= 1) {
   1664                 /* Avoid warning*/
   1665             }
   1666             k = k0;
   1667             for (j = j0; j < j0 + k0; j += 2) {
   1668                 x = a[j];
   1669                 a[j] = a[k];
   1670                 a[k] = x;
   1671                 j1 = j + nh;
   1672                 k1 = k + 1;
   1673                 x = a[j1];
   1674                 a[j1] = a[k1];
   1675                 a[k1] = x;
   1676                 j1++;
   1677                 k1 += nh;
   1678                 x = a[j1];
   1679                 a[j1] = a[k1];
   1680                 a[k1] = x;
   1681                 j1 -= nh;
   1682                 k1--;
   1683                 x = a[j1];
   1684                 a[j1] = a[k1];
   1685                 a[k1] = x;
   1686                 for (i = nh >> 1; i > (k ^= i); i >>= 1) {
   1687                     /* Avoid warning*/
   1688                 }
   1689             }
   1690             k1 = j0 + k0;
   1691             j1 = k1 + 1;
   1692             k1 += nh;
   1693             x = a[j1];
   1694             a[j1] = a[k1];
   1695             a[k1] = x;
   1696         }
   1697     }
   1698 }
   1699 
   1700 
   1701 /* **************************************************************************************************/
   1702 
   1703 /* **************************************************************************************************/
   1704 
   1705 void cftb1st(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   1706 {
   1707     picoos_int32 i, i0, j, j0, j1, j2, j3, m, mh;
   1708     PICOFFTSG_FFTTYPE wk1r, wk1i, wk3r, wk3i,
   1709         wd1r, wd1i, wd3r, wd3i, ss1, ss3;
   1710     PICOFFTSG_FFTTYPE x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
   1711 
   1712     mh = n >> 3;
   1713     m = 2 * mh;
   1714     j1 = m;
   1715     j2 = j1 + m;
   1716     j3 = j2 + m;
   1717     x0r = a[0] + a[j2];
   1718     x0i = -a[1] - a[j2 + 1];
   1719     x1r = a[0] - a[j2];
   1720     x1i = -a[1] + a[j2 + 1];
   1721     x2r = a[j1] + a[j3];
   1722     x2i = a[j1 + 1] + a[j3 + 1];
   1723     x3r = a[j1] - a[j3];
   1724     x3i = a[j1 + 1] - a[j3 + 1];
   1725     a[0] = x0r + x2r;
   1726     a[1] = x0i - x2i;
   1727     a[j1] = x0r - x2r;
   1728     a[j1 + 1] = x0i + x2i;
   1729     a[j2] = x1r + x3i;
   1730     a[j2 + 1] = x1i + x3r;
   1731     a[j3] = x1r - x3i;
   1732     a[j3 + 1] = x1i - x3r;
   1733     wd1r = PICODSP_WGT_SHIFT;
   1734     wd1i = 0;
   1735     wd3r = PICODSP_WGT_SHIFT;
   1736     wd3i = 0;
   1737 
   1738     wk1r  = (PICOFFTSG_FFTTYPE) (0.998795449734  *PICODSP_WGT_SHIFT);
   1739     wk1i  = (PICOFFTSG_FFTTYPE) (0.049067676067  *PICODSP_WGT_SHIFT);
   1740     ss1   = (PICOFFTSG_FFTTYPE) (0.098135352135  *PICODSP_WGT_SHIFT);
   1741     wk3i  = (PICOFFTSG_FFTTYPE) (-0.146730467677 *PICODSP_WGT_SHIFT);
   1742     wk3r  = (PICOFFTSG_FFTTYPE) (0.989176511765  *PICODSP_WGT_SHIFT);
   1743     ss3   = (PICOFFTSG_FFTTYPE) (-0.293460935354 *PICODSP_WGT_SHIFT);
   1744 
   1745     i = 0;
   1746     for (;;) {
   1747         i0 = i + CDFT_LOOP_DIV_4;
   1748         if (i0 > mh - 4) {
   1749             i0 = mh - 4;
   1750         }
   1751         for (j = i + 2; j < i0; j += 4) {
   1752 
   1753             wd1r -= Mult_W_W(ss1, wk1i);
   1754             wd1i += Mult_W_W(ss1, wk1r);
   1755             wd3r -= Mult_W_W(ss3, wk3i);
   1756             wd3i += Mult_W_W(ss3, wk3r);
   1757 
   1758             j1 = j + m;
   1759             j2 = j1 + m;
   1760             j3 = j2 + m;
   1761             x0r = a[j] + a[j2];
   1762             x0i = -a[j + 1] - a[j2 + 1];
   1763             x1r = a[j] - a[j2];
   1764             x1i = -a[j + 1] + a[j2 + 1];
   1765             x2r = a[j1] + a[j3];
   1766             x2i = a[j1 + 1] + a[j3 + 1];
   1767             x3r = a[j1] - a[j3];
   1768             x3i = a[j1 + 1] - a[j3 + 1];
   1769             a[j] = x0r + x2r;
   1770             a[j + 1] = x0i - x2i;
   1771             a[j1] = x0r - x2r;
   1772             a[j1 + 1] = x0i + x2i;
   1773             x0r = x1r + x3i;
   1774             x0i = x1i + x3r;
   1775             a[j2] = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   1776             a[j2 + 1] = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   1777             x0r = x1r - x3i;
   1778             x0i = x1i - x3r;
   1779             a[j3] = Mult_W_W(wk3r, x0r) + Mult_W_W(wk3i, x0i);
   1780             a[j3 + 1] = Mult_W_W(wk3r, x0i) - Mult_W_W(wk3i, x0r);
   1781             x0r = a[j + 2] + a[j2 + 2];
   1782             x0i = -a[j + 3] - a[j2 + 3];
   1783             x1r = a[j + 2] - a[j2 + 2];
   1784             x1i = -a[j + 3] + a[j2 + 3];
   1785             x2r = a[j1 + 2] + a[j3 + 2];
   1786             x2i = a[j1 + 3] + a[j3 + 3];
   1787             x3r = a[j1 + 2] - a[j3 + 2];
   1788             x3i = a[j1 + 3] - a[j3 + 3];
   1789             a[j + 2] = x0r + x2r;
   1790             a[j + 3] = x0i - x2i;
   1791             a[j1 + 2] = x0r - x2r;
   1792             a[j1 + 3] = x0i + x2i;
   1793             x0r = x1r + x3i;
   1794             x0i = x1i + x3r;
   1795             a[j2 + 2] = Mult_W_W(wd1r, x0r) - Mult_W_W(wd1i, x0i);
   1796             a[j2 + 3] = Mult_W_W(wd1r, x0i) + Mult_W_W(wd1i, x0r);
   1797             x0r = x1r - x3i;
   1798             x0i = x1i - x3r;
   1799             a[j3 + 2] = Mult_W_W(wd3r, x0r) + Mult_W_W(wd3i, x0i);
   1800             a[j3 + 3] = Mult_W_W(wd3r, x0i) - Mult_W_W(wd3i, x0r);
   1801             j0 = m - j;
   1802             j1 = j0 + m;
   1803             j2 = j1 + m;
   1804             j3 = j2 + m;
   1805             x0r = a[j0] + a[j2];
   1806             x0i = -a[j0 + 1] - a[j2 + 1];
   1807             x1r = a[j0] - a[j2];
   1808             x1i = -a[j0 + 1] + a[j2 + 1];
   1809             x2r = a[j1] + a[j3];
   1810             x2i = a[j1 + 1] + a[j3 + 1];
   1811             x3r = a[j1] - a[j3];
   1812             x3i = a[j1 + 1] - a[j3 + 1];
   1813             a[j0] = x0r + x2r;
   1814             a[j0 + 1] = x0i - x2i;
   1815             a[j1] = x0r - x2r;
   1816             a[j1 + 1] = x0i + x2i;
   1817             x0r = x1r + x3i;
   1818             x0i = x1i + x3r;
   1819             a[j2] = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
   1820             a[j2 + 1] = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
   1821             x0r = x1r - x3i;
   1822             x0i = x1i - x3r;
   1823             a[j3] = Mult_W_W(wk3i, x0r) + Mult_W_W(wk3r, x0i);
   1824             a[j3 + 1] = Mult_W_W(wk3i, x0i) - Mult_W_W(wk3r, x0r);
   1825             x0r = a[j0 - 2] + a[j2 - 2];
   1826             x0i = -a[j0 - 1] - a[j2 - 1];
   1827             x1r = a[j0 - 2] - a[j2 - 2];
   1828             x1i = -a[j0 - 1] + a[j2 - 1];
   1829             x2r = a[j1 - 2] + a[j3 - 2];
   1830             x2i = a[j1 - 1] + a[j3 - 1];
   1831             x3r = a[j1 - 2] - a[j3 - 2];
   1832             x3i = a[j1 - 1] - a[j3 - 1];
   1833             a[j0 - 2] = x0r + x2r;
   1834             a[j0 - 1] = x0i - x2i;
   1835             a[j1 - 2] = x0r - x2r;
   1836             a[j1 - 1] = x0i + x2i;
   1837             x0r = x1r + x3i;
   1838             x0i = x1i + x3r;
   1839             a[j2 - 2] = Mult_W_W(wd1i, x0r) - Mult_W_W(wd1r, x0i);
   1840             a[j2 - 1] = Mult_W_W(wd1i, x0i) + Mult_W_W(wd1r, x0r);
   1841             x0r = x1r - x3i;
   1842             x0i = x1i - x3r;
   1843             a[j3 - 2] = Mult_W_W(wd3i, x0r) + Mult_W_W(wd3r, x0i);
   1844             a[j3 - 1] = Mult_W_W(wd3i, x0i) - Mult_W_W(wd3r, x0r);
   1845             wk1r -= Mult_W_W(ss1, wd1i);
   1846             wk1i += Mult_W_W(ss1, wd1r);
   1847             wk3r -= Mult_W_W(ss3, wd3i);
   1848             wk3i += Mult_W_W(ss3, wd3r);
   1849         }
   1850         if (i0 == mh - 4) {
   1851             break;
   1852         }
   1853     }
   1854     wd1r = WR5000;
   1855     j0 = mh;
   1856     j1 = j0 + m;
   1857     j2 = j1 + m;
   1858     j3 = j2 + m;
   1859     x0r = a[j0 - 2] + a[j2 - 2];
   1860     x0i = -a[j0 - 1] - a[j2 - 1];
   1861     x1r = a[j0 - 2] - a[j2 - 2];
   1862     x1i = -a[j0 - 1] + a[j2 - 1];
   1863     x2r = a[j1 - 2] + a[j3 - 2];
   1864     x2i = a[j1 - 1] + a[j3 - 1];
   1865     x3r = a[j1 - 2] - a[j3 - 2];
   1866     x3i = a[j1 - 1] - a[j3 - 1];
   1867     a[j0 - 2] = x0r + x2r;
   1868     a[j0 - 1] = x0i - x2i;
   1869     a[j1 - 2] = x0r - x2r;
   1870     a[j1 - 1] = x0i + x2i;
   1871     x0r = x1r + x3i;
   1872     x0i = x1i + x3r;
   1873     a[j2 - 2] = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   1874     a[j2 - 1] = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   1875     x0r = x1r - x3i;
   1876     x0i = x1i - x3r;
   1877     a[j3 - 2] = Mult_W_W(wk3r, x0r) + Mult_W_W(wk3i, x0i);
   1878     a[j3 - 1] = Mult_W_W(wk3r, x0i) - Mult_W_W(wk3i, x0r);
   1879     x0r = a[j0] + a[j2];
   1880     x0i = -a[j0 + 1] - a[j2 + 1];
   1881     x1r = a[j0] - a[j2];
   1882     x1i = -a[j0 + 1] + a[j2 + 1];
   1883     x2r = a[j1] + a[j3];
   1884     x2i = a[j1 + 1] + a[j3 + 1];
   1885     x3r = a[j1] - a[j3];
   1886     x3i = a[j1 + 1] - a[j3 + 1];
   1887     a[j0] = x0r + x2r;
   1888     a[j0 + 1] = x0i - x2i;
   1889     a[j1] = x0r - x2r;
   1890     a[j1 + 1] = x0i + x2i;
   1891     x0r = x1r + x3i;
   1892     x0i = x1i + x3r;
   1893     a[j2] = picofftsg_mult_w_a(wd1r, (x0r - x0i));
   1894     a[j2 + 1] = picofftsg_mult_w_a(wd1r, (x0i + x0r));
   1895     x0r = x1r - x3i;
   1896     x0i = x1i - x3r;
   1897     a[j3] = -picofftsg_mult_w_a(wd1r, (x0r + x0i));
   1898     a[j3 + 1] = -picofftsg_mult_w_a(wd1r, (x0i - x0r));
   1899     x0r = a[j0 + 2] + a[j2 + 2];
   1900     x0i = -a[j0 + 3] - a[j2 + 3];
   1901     x1r = a[j0 + 2] - a[j2 + 2];
   1902     x1i = -a[j0 + 3] + a[j2 + 3];
   1903     x2r = a[j1 + 2] + a[j3 + 2];
   1904     x2i = a[j1 + 3] + a[j3 + 3];
   1905     x3r = a[j1 + 2] - a[j3 + 2];
   1906     x3i = a[j1 + 3] - a[j3 + 3];
   1907     a[j0 + 2] = x0r + x2r;
   1908     a[j0 + 3] = x0i - x2i;
   1909     a[j1 + 2] = x0r - x2r;
   1910     a[j1 + 3] = x0i + x2i;
   1911     x0r = x1r + x3i;
   1912     x0i = x1i + x3r;
   1913     a[j2 + 2] = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
   1914     a[j2 + 3] = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
   1915     x0r = x1r - x3i;
   1916     x0i = x1i - x3r;
   1917     a[j3 + 2] = Mult_W_W(wk3i, x0r) + Mult_W_W(wk3r, x0i);
   1918     a[j3 + 3] = Mult_W_W(wk3i, x0i) - Mult_W_W(wk3r, x0r);
   1919 }
   1920 
   1921 void cftrec4(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   1922 {
   1923     picoos_int32 isplt, j, k, m;
   1924 
   1925     m = n;
   1926     while (m > 512) {
   1927         m >>= 2;
   1928         cftmdl1(m, &a[n - m]);
   1929     }
   1930     cftleaf(m, 1, &a[n - m]);
   1931     k = 0;
   1932     for (j = n - m; j > 0; j -= m) {
   1933         k++;
   1934         isplt = cfttree(m, j, k, a);
   1935         cftleaf(m, isplt, &a[j - m]);
   1936     }
   1937 }
   1938 
   1939 
   1940 picoos_int32 cfttree(picoos_int32 n, picoos_int32 j, picoos_int32 k, PICOFFTSG_FFTTYPE *a)
   1941 {
   1942     picoos_int32 i, isplt, m;
   1943 
   1944     if ((k & 3) != 0) {
   1945         isplt = k & 1;
   1946         if (isplt != 0) {
   1947             cftmdl1(n, &a[j - n]);
   1948         } else {
   1949             cftmdl2(n, &a[j - n]);
   1950         }
   1951     } else {
   1952         m = n;
   1953         for (i = k; (i & 3) == 0; i >>= 2) {
   1954             m <<= 2;
   1955         }
   1956         isplt = i & 1;
   1957         if (isplt != 0) {
   1958             while (m > 128) {
   1959                 cftmdl1(m, &a[j - m]);
   1960                 m >>= 2;
   1961             }
   1962         } else {
   1963             while (m > 128) {
   1964                 cftmdl2(m, &a[j - m]);
   1965                 m >>= 2;
   1966             }
   1967         }
   1968     }
   1969     return isplt;
   1970 }
   1971 
   1972 
   1973 void cftleaf(picoos_int32 n, picoos_int32 isplt, PICOFFTSG_FFTTYPE *a)
   1974 {
   1975 
   1976     if (n == 512) {
   1977         cftmdl1(128, a);
   1978         cftf161(a);
   1979         cftf162(&a[32]);
   1980         cftf161(&a[64]);
   1981         cftf161(&a[96]);
   1982         cftmdl2(128, &a[128]);
   1983         cftf161(&a[128]);
   1984         cftf162(&a[160]);
   1985         cftf161(&a[192]);
   1986         cftf162(&a[224]);
   1987         cftmdl1(128, &a[256]);
   1988         cftf161(&a[256]);
   1989         cftf162(&a[288]);
   1990         cftf161(&a[320]);
   1991         cftf161(&a[352]);
   1992         if (isplt != 0) {
   1993             cftmdl1(128, &a[384]);
   1994             cftf161(&a[480]);
   1995         } else {
   1996             cftmdl2(128, &a[384]);
   1997             cftf162(&a[480]);
   1998         }
   1999         cftf161(&a[384]);
   2000         cftf162(&a[416]);
   2001         cftf161(&a[448]);
   2002     } else {
   2003         cftmdl1(64, a);
   2004         cftf081(a);
   2005         cftf082(&a[16]);
   2006         cftf081(&a[32]);
   2007         cftf081(&a[48]);
   2008         cftmdl2(64, &a[64]);
   2009         cftf081(&a[64]);
   2010         cftf082(&a[80]);
   2011         cftf081(&a[96]);
   2012         cftf082(&a[112]);
   2013         cftmdl1(64, &a[128]);
   2014         cftf081(&a[128]);
   2015         cftf082(&a[144]);
   2016         cftf081(&a[160]);
   2017         cftf081(&a[176]);
   2018         if (isplt != 0) {
   2019             cftmdl1(64, &a[192]);
   2020             cftf081(&a[240]);
   2021         } else {
   2022             cftmdl2(64, &a[192]);
   2023             cftf082(&a[240]);
   2024         }
   2025         cftf081(&a[192]);
   2026         cftf082(&a[208]);
   2027         cftf081(&a[224]);
   2028     }
   2029 }
   2030 
   2031 
   2032 void cftmdl1(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   2033 {
   2034     picoos_int32 i, i0, j, j0, j1, j2, j3, m, mh;
   2035     PICOFFTSG_FFTTYPE wk1r, wk1i, wk3r, wk3i,
   2036         wd1r, wd1i, wd3r, wd3i, ss1, ss3;
   2037     PICOFFTSG_FFTTYPE x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
   2038 
   2039     mh = n >> 3;
   2040     m = 2 * mh;
   2041     j1 = m;
   2042     j2 = j1 + m;
   2043     j3 = j2 + m;
   2044     x0r = a[0] + a[j2];
   2045     x0i = a[1] + a[j2 + 1];
   2046     x1r = a[0] - a[j2];
   2047     x1i = a[1] - a[j2 + 1];
   2048     x2r = a[j1] + a[j3];
   2049     x2i = a[j1 + 1] + a[j3 + 1];
   2050     x3r = a[j1] - a[j3];
   2051     x3i = a[j1 + 1] - a[j3 + 1];
   2052     a[0] = x0r + x2r;
   2053     a[1] = x0i + x2i;
   2054     a[j1] = x0r - x2r;
   2055     a[j1 + 1] = x0i - x2i;
   2056     a[j2] = x1r - x3i;
   2057     a[j2 + 1] = x1i + x3r;
   2058     a[j3] = x1r + x3i;
   2059     a[j3 + 1] = x1i - x3r;
   2060     wd1r = (PICOFFTSG_FFTTYPE)(PICODSP_WGT_SHIFT);
   2061     wd1i = 0;
   2062     wd3r = (PICOFFTSG_FFTTYPE)(PICODSP_WGT_SHIFT);
   2063     wd3i = 0;
   2064     wk1r  =  (PICOFFTSG_FFTTYPE) (0.980785250664  *PICODSP_WGT_SHIFT);
   2065     wk1i  =  (PICOFFTSG_FFTTYPE) (0.195090323687  *PICODSP_WGT_SHIFT);
   2066     ss1   =  (PICOFFTSG_FFTTYPE) (0.390180647373  *PICODSP_WGT_SHIFT);
   2067     wk3i  =  (PICOFFTSG_FFTTYPE) (-0.555570185184 *PICODSP_WGT_SHIFT);
   2068     wk3r  =  (PICOFFTSG_FFTTYPE) (0.831469595432  *PICODSP_WGT_SHIFT);
   2069     ss3   =  (PICOFFTSG_FFTTYPE) (-1.111140370369 *PICODSP_WGT_SHIFT);
   2070 
   2071     i = 0;
   2072     for (;;) {
   2073         i0 = i + CDFT_LOOP_DIV_4;
   2074         if (i0 > mh - 4) {
   2075             i0 = mh - 4;
   2076         }
   2077         for (j = i + 2; j < i0; j += 4) {
   2078             wd1r -= Mult_W_W(ss1, wk1i);
   2079             wd1i += Mult_W_W(ss1, wk1r);
   2080             wd3r -= Mult_W_W(ss3, wk3i);
   2081             wd3i += Mult_W_W(ss3, wk3r);
   2082             j1 = j + m;
   2083             j2 = j1 + m;
   2084             j3 = j2 + m;
   2085             x0r = a[j] + a[j2];
   2086             x0i = a[j + 1] + a[j2 + 1];
   2087             x1r = a[j] - a[j2];
   2088             x1i = a[j + 1] - a[j2 + 1];
   2089             x2r = a[j1] + a[j3];
   2090             x2i = a[j1 + 1] + a[j3 + 1];
   2091             x3r = a[j1] - a[j3];
   2092             x3i = a[j1 + 1] - a[j3 + 1];
   2093             a[j] = x0r + x2r;
   2094             a[j + 1] = x0i + x2i;
   2095             a[j1] = x0r - x2r;
   2096             a[j1 + 1] = x0i - x2i;
   2097             x0r = x1r - x3i;
   2098             x0i = x1i + x3r;
   2099             a[j2] = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   2100             a[j2 + 1] = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   2101             x0r = x1r + x3i;
   2102             x0i = x1i - x3r;
   2103             a[j3] = Mult_W_W(wk3r, x0r) + Mult_W_W(wk3i, x0i);
   2104             a[j3 + 1] = Mult_W_W(wk3r, x0i) - Mult_W_W(wk3i, x0r);
   2105             x0r = a[j + 2] + a[j2 + 2];
   2106             x0i = a[j + 3] + a[j2 + 3];
   2107             x1r = a[j + 2] - a[j2 + 2];
   2108             x1i = a[j + 3] - a[j2 + 3];
   2109             x2r = a[j1 + 2] + a[j3 + 2];
   2110             x2i = a[j1 + 3] + a[j3 + 3];
   2111             x3r = a[j1 + 2] - a[j3 + 2];
   2112             x3i = a[j1 + 3] - a[j3 + 3];
   2113             a[j + 2] = x0r + x2r;
   2114             a[j + 3] = x0i + x2i;
   2115             a[j1 + 2] = x0r - x2r;
   2116             a[j1 + 3] = x0i - x2i;
   2117             x0r = x1r - x3i;
   2118             x0i = x1i + x3r;
   2119             a[j2 + 2] = Mult_W_W(wd1r, x0r) - Mult_W_W(wd1i, x0i);
   2120             a[j2 + 3] = Mult_W_W(wd1r, x0i) + Mult_W_W(wd1i, x0r);
   2121             x0r = x1r + x3i;
   2122             x0i = x1i - x3r;
   2123             a[j3 + 2] = Mult_W_W(wd3r, x0r) + Mult_W_W(wd3i, x0i);
   2124             a[j3 + 3] = Mult_W_W(wd3r, x0i) - Mult_W_W(wd3i, x0r);
   2125             j0 = m - j;
   2126             j1 = j0 + m;
   2127             j2 = j1 + m;
   2128             j3 = j2 + m;
   2129             x0r = a[j0] + a[j2];
   2130             x0i = a[j0 + 1] + a[j2 + 1];
   2131             x1r = a[j0] - a[j2];
   2132             x1i = a[j0 + 1] - a[j2 + 1];
   2133             x2r = a[j1] + a[j3];
   2134             x2i = a[j1 + 1] + a[j3 + 1];
   2135             x3r = a[j1] - a[j3];
   2136             x3i = a[j1 + 1] - a[j3 + 1];
   2137             a[j0] = x0r + x2r;
   2138             a[j0 + 1] = x0i + x2i;
   2139             a[j1] = x0r - x2r;
   2140             a[j1 + 1] = x0i - x2i;
   2141             x0r = x1r - x3i;
   2142             x0i = x1i + x3r;
   2143             a[j2] = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
   2144             a[j2 + 1] = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
   2145             x0r = x1r + x3i;
   2146             x0i = x1i - x3r;
   2147             a[j3] = Mult_W_W(wk3i, x0r) + Mult_W_W(wk3r, x0i);
   2148             a[j3 + 1] = Mult_W_W(wk3i, x0i) - Mult_W_W(wk3r, x0r);
   2149             x0r = a[j0 - 2] + a[j2 - 2];
   2150             x0i = a[j0 - 1] + a[j2 - 1];
   2151             x1r = a[j0 - 2] - a[j2 - 2];
   2152             x1i = a[j0 - 1] - a[j2 - 1];
   2153             x2r = a[j1 - 2] + a[j3 - 2];
   2154             x2i = a[j1 - 1] + a[j3 - 1];
   2155             x3r = a[j1 - 2] - a[j3 - 2];
   2156             x3i = a[j1 - 1] - a[j3 - 1];
   2157             a[j0 - 2] = x0r + x2r;
   2158             a[j0 - 1] = x0i + x2i;
   2159             a[j1 - 2] = x0r - x2r;
   2160             a[j1 - 1] = x0i - x2i;
   2161             x0r = x1r - x3i;
   2162             x0i = x1i + x3r;
   2163             a[j2 - 2] = Mult_W_W(wd1i, x0r) - Mult_W_W(wd1r, x0i);
   2164             a[j2 - 1] = Mult_W_W(wd1i, x0i) + Mult_W_W(wd1r, x0r);
   2165             x0r = x1r + x3i;
   2166             x0i = x1i - x3r;
   2167             a[j3 - 2] = Mult_W_W(wd3i, x0r) + Mult_W_W(wd3r, x0i);
   2168             a[j3 - 1] = Mult_W_W(wd3i, x0i) - Mult_W_W(wd3r, x0r);
   2169             wk1r -= Mult_W_W(ss1, wd1i);
   2170             wk1i += Mult_W_W(ss1, wd1r);
   2171             wk3r -= Mult_W_W(ss3, wd3i);
   2172             wk3i += Mult_W_W(ss3, wd3r);
   2173         }
   2174         if (i0 == mh - 4) {
   2175             break;
   2176         }
   2177     }
   2178     wd1r = WR5000;
   2179     j0 = mh;
   2180     j1 = j0 + m;
   2181     j2 = j1 + m;
   2182     j3 = j2 + m;
   2183     x0r = a[j0 - 2] + a[j2 - 2];
   2184     x0i = a[j0 - 1] + a[j2 - 1];
   2185     x1r = a[j0 - 2] - a[j2 - 2];
   2186     x1i = a[j0 - 1] - a[j2 - 1];
   2187     x2r = a[j1 - 2] + a[j3 - 2];
   2188     x2i = a[j1 - 1] + a[j3 - 1];
   2189     x3r = a[j1 - 2] - a[j3 - 2];
   2190     x3i = a[j1 - 1] - a[j3 - 1];
   2191     a[j0 - 2] = x0r + x2r;
   2192     a[j0 - 1] = x0i + x2i;
   2193     a[j1 - 2] = x0r - x2r;
   2194     a[j1 - 1] = x0i - x2i;
   2195     x0r = x1r - x3i;
   2196     x0i = x1i + x3r;
   2197     a[j2 - 2] = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   2198     a[j2 - 1] = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   2199     x0r = x1r + x3i;
   2200     x0i = x1i - x3r;
   2201     a[j3 - 2] = Mult_W_W(wk3r, x0r) + Mult_W_W(wk3i, x0i);
   2202     a[j3 - 1] = Mult_W_W(wk3r, x0i) - Mult_W_W(wk3i, x0r);
   2203     x0r = a[j0] + a[j2];
   2204     x0i = a[j0 + 1] + a[j2 + 1];
   2205     x1r = a[j0] - a[j2];
   2206     x1i = a[j0 + 1] - a[j2 + 1];
   2207     x2r = a[j1] + a[j3];
   2208     x2i = a[j1 + 1] + a[j3 + 1];
   2209     x3r = a[j1] - a[j3];
   2210     x3i = a[j1 + 1] - a[j3 + 1];
   2211     a[j0] = x0r + x2r;
   2212     a[j0 + 1] = x0i + x2i;
   2213     a[j1] = x0r - x2r;
   2214     a[j1 + 1] = x0i - x2i;
   2215     x0r = x1r - x3i;
   2216     x0i = x1i + x3r;
   2217     a[j2] = picofftsg_mult_w_a(wd1r, (x0r - x0i));
   2218     a[j2 + 1] = picofftsg_mult_w_a(wd1r, (x0i + x0r));
   2219     x0r = x1r + x3i;
   2220     x0i = x1i - x3r;
   2221     a[j3] = -picofftsg_mult_w_a(wd1r, (x0r + x0i));
   2222     a[j3 + 1] = -picofftsg_mult_w_a(wd1r, (x0i - x0r));
   2223     x0r = a[j0 + 2] + a[j2 + 2];
   2224     x0i = a[j0 + 3] + a[j2 + 3];
   2225     x1r = a[j0 + 2] - a[j2 + 2];
   2226     x1i = a[j0 + 3] - a[j2 + 3];
   2227     x2r = a[j1 + 2] + a[j3 + 2];
   2228     x2i = a[j1 + 3] + a[j3 + 3];
   2229     x3r = a[j1 + 2] - a[j3 + 2];
   2230     x3i = a[j1 + 3] - a[j3 + 3];
   2231     a[j0 + 2] = x0r + x2r;
   2232     a[j0 + 3] = x0i + x2i;
   2233     a[j1 + 2] = x0r - x2r;
   2234     a[j1 + 3] = x0i - x2i;
   2235     x0r = x1r - x3i;
   2236     x0i = x1i + x3r;
   2237     a[j2 + 2] = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
   2238     a[j2 + 3] = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
   2239     x0r = x1r + x3i;
   2240     x0i = x1i - x3r;
   2241     a[j3 + 2] = Mult_W_W(wk3i, x0r) + Mult_W_W(wk3r, x0i);
   2242     a[j3 + 3] = Mult_W_W(wk3i, x0i) - Mult_W_W(wk3r, x0r);
   2243 }
   2244 
   2245 
   2246 void cftmdl2(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   2247 {
   2248     picoos_int32 i, i0, j, j0, j1, j2, j3, m, mh;
   2249     PICOFFTSG_FFTTYPE wn4r, wk1r, wk1i, wk3r, wk3i,
   2250         wl1r, wl1i, wl3r, wl3i, wd1r, wd1i, wd3r, wd3i,
   2251         we1r, we1i, we3r, we3i, ss1, ss3;
   2252     PICOFFTSG_FFTTYPE x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
   2253 
   2254     mh = n >> 3;
   2255     m = 2 * mh;
   2256     wn4r = WR5000;
   2257     j1 = m;
   2258     j2 = j1 + m;
   2259     j3 = j2 + m;
   2260     x0r = a[0] - a[j2 + 1];
   2261     x0i = a[1] + a[j2];
   2262     x1r = a[0] + a[j2 + 1];
   2263     x1i = a[1] - a[j2];
   2264     x2r = a[j1] - a[j3 + 1];
   2265     x2i = a[j1 + 1] + a[j3];
   2266     x3r = a[j1] + a[j3 + 1];
   2267     x3i = a[j1 + 1] - a[j3];
   2268     y0r = picofftsg_mult_w_a(wn4r, (x2r - x2i));
   2269     y0i = picofftsg_mult_w_a(wn4r, (x2i + x2r));
   2270     a[0] = x0r + y0r;
   2271     a[1] = x0i + y0i;
   2272     a[j1] = x0r - y0r;
   2273     a[j1 + 1] = x0i - y0i;
   2274     y0r = picofftsg_mult_w_a(wn4r, (x3r - x3i));
   2275     y0i = picofftsg_mult_w_a(wn4r, (x3i + x3r));
   2276     a[j2] = x1r - y0i;
   2277     a[j2 + 1] = x1i + y0r;
   2278     a[j3] = x1r + y0i;
   2279     a[j3 + 1] = x1i - y0r;
   2280     wl1r = PICODSP_WGT_SHIFT;
   2281     wl1i = 0;
   2282     wl3r = PICODSP_WGT_SHIFT;
   2283     wl3i = 0;
   2284     we1r = wn4r;
   2285     we1i = wn4r;
   2286     we3r = -wn4r;
   2287     we3i = -wn4r;
   2288 
   2289     wk1r  =  (PICOFFTSG_FFTTYPE)(0.995184719563  *PICODSP_WGT_SHIFT);
   2290     wk1i  =  (PICOFFTSG_FFTTYPE)(0.098017141223  *PICODSP_WGT_SHIFT);
   2291     wd1r  =  (PICOFFTSG_FFTTYPE)(0.634393274784  *PICODSP_WGT_SHIFT);
   2292     wd1i  =  (PICOFFTSG_FFTTYPE)(0.773010432720  *PICODSP_WGT_SHIFT);
   2293     ss1   =  (PICOFFTSG_FFTTYPE)(0.196034282446  *PICODSP_WGT_SHIFT);
   2294     wk3i  =  (PICOFFTSG_FFTTYPE)(-0.290284663439 *PICODSP_WGT_SHIFT);
   2295     wk3r  =  (PICOFFTSG_FFTTYPE)(0.956940352917  *PICODSP_WGT_SHIFT);
   2296     ss3   =  (PICOFFTSG_FFTTYPE)(-0.580569326878 *PICODSP_WGT_SHIFT);
   2297     wd3r  =  (PICOFFTSG_FFTTYPE)(-0.881921231747 *PICODSP_WGT_SHIFT);
   2298     wd3i  =  (PICOFFTSG_FFTTYPE)(-0.471396744251 *PICODSP_WGT_SHIFT);
   2299 
   2300     i = 0;
   2301     for (;;) {
   2302         i0 = i + 4 * CDFT_LOOP_DIV;
   2303         if (i0 > mh - 4) {
   2304             i0 = mh - 4;
   2305         }
   2306         for (j = i + 2; j < i0; j += 4) {
   2307             wl1r -= Mult_W_W(ss1, wk1i);
   2308             wl1i += Mult_W_W(ss1, wk1r);
   2309             wl3r -= Mult_W_W(ss3, wk3i);
   2310             wl3i += Mult_W_W(ss3, wk3r);
   2311             we1r -= Mult_W_W(ss1, wd1i);
   2312             we1i += Mult_W_W(ss1, wd1r);
   2313             we3r -= Mult_W_W(ss3, wd3i);
   2314             we3i += Mult_W_W(ss3, wd3r);
   2315             j1 = j + m;
   2316             j2 = j1 + m;
   2317             j3 = j2 + m;
   2318             x0r = a[j] - a[j2 + 1];
   2319             x0i = a[j + 1] + a[j2];
   2320             x1r = a[j] + a[j2 + 1];
   2321             x1i = a[j + 1] - a[j2];
   2322             x2r = a[j1] - a[j3 + 1];
   2323             x2i = a[j1 + 1] + a[j3];
   2324             x3r = a[j1] + a[j3 + 1];
   2325             x3i = a[j1 + 1] - a[j3];
   2326             y0r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   2327             y0i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   2328             y2r = Mult_W_W(wd1r, x2r) - Mult_W_W(wd1i, x2i);
   2329             y2i = Mult_W_W(wd1r, x2i) + Mult_W_W(wd1i, x2r);
   2330             a[j] = y0r + y2r;
   2331             a[j + 1] = y0i + y2i;
   2332             a[j1] = y0r - y2r;
   2333             a[j1 + 1] = y0i - y2i;
   2334             y0r = Mult_W_W(wk3r, x1r) + Mult_W_W(wk3i, x1i);
   2335             y0i = Mult_W_W(wk3r, x1i) - Mult_W_W(wk3i, x1r);
   2336             y2r = Mult_W_W(wd3r, x3r) + Mult_W_W(wd3i, x3i);
   2337             y2i = Mult_W_W(wd3r, x3i) - Mult_W_W(wd3i, x3r);
   2338             a[j2] = y0r + y2r;
   2339             a[j2 + 1] = y0i + y2i;
   2340             a[j3] = y0r - y2r;
   2341             a[j3 + 1] = y0i - y2i;
   2342             x0r = a[j + 2] - a[j2 + 3];
   2343             x0i = a[j + 3] + a[j2 + 2];
   2344             x1r = a[j + 2] + a[j2 + 3];
   2345             x1i = a[j + 3] - a[j2 + 2];
   2346             x2r = a[j1 + 2] - a[j3 + 3];
   2347             x2i = a[j1 + 3] + a[j3 + 2];
   2348             x3r = a[j1 + 2] + a[j3 + 3];
   2349             x3i = a[j1 + 3] - a[j3 + 2];
   2350             y0r = Mult_W_W(wl1r, x0r) - Mult_W_W(wl1i, x0i);
   2351             y0i = Mult_W_W(wl1r, x0i) + Mult_W_W(wl1i, x0r);
   2352             y2r = Mult_W_W(we1r, x2r) - Mult_W_W(we1i, x2i);
   2353             y2i = Mult_W_W(we1r, x2i) + Mult_W_W(we1i, x2r);
   2354             a[j + 2] = y0r + y2r;
   2355             a[j + 3] = y0i + y2i;
   2356             a[j1 + 2] = y0r - y2r;
   2357             a[j1 + 3] = y0i - y2i;
   2358             y0r = Mult_W_W(wl3r, x1r) + Mult_W_W(wl3i, x1i);
   2359             y0i = Mult_W_W(wl3r, x1i) - Mult_W_W(wl3i, x1r);
   2360             y2r = Mult_W_W(we3r, x3r) + Mult_W_W(we3i, x3i);
   2361             y2i = Mult_W_W(we3r, x3i) - Mult_W_W(we3i, x3r);
   2362             a[j2 + 2] = y0r + y2r;
   2363             a[j2 + 3] = y0i + y2i;
   2364             a[j3 + 2] = y0r - y2r;
   2365             a[j3 + 3] = y0i - y2i;
   2366             j0 = m - j;
   2367             j1 = j0 + m;
   2368             j2 = j1 + m;
   2369             j3 = j2 + m;
   2370             x0r = a[j0] - a[j2 + 1];
   2371             x0i = a[j0 + 1] + a[j2];
   2372             x1r = a[j0] + a[j2 + 1];
   2373             x1i = a[j0 + 1] - a[j2];
   2374             x2r = a[j1] - a[j3 + 1];
   2375             x2i = a[j1 + 1] + a[j3];
   2376             x3r = a[j1] + a[j3 + 1];
   2377             x3i = a[j1 + 1] - a[j3];
   2378             y0r = Mult_W_W(wd1i, x0r) - Mult_W_W(wd1r, x0i);
   2379             y0i = Mult_W_W(wd1i, x0i) + Mult_W_W(wd1r, x0r);
   2380             y2r = Mult_W_W(wk1i, x2r) - Mult_W_W(wk1r, x2i);
   2381             y2i = Mult_W_W(wk1i, x2i) + Mult_W_W(wk1r, x2r);
   2382             a[j0] = y0r + y2r;
   2383             a[j0 + 1] = y0i + y2i;
   2384             a[j1] = y0r - y2r;
   2385             a[j1 + 1] = y0i - y2i;
   2386             y0r = Mult_W_W(wd3i, x1r) + Mult_W_W(wd3r, x1i);
   2387             y0i = Mult_W_W(wd3i, x1i) - Mult_W_W(wd3r, x1r);
   2388             y2r = Mult_W_W(wk3i, x3r) + Mult_W_W(wk3r, x3i);
   2389             y2i = Mult_W_W(wk3i, x3i) - Mult_W_W(wk3r, x3r);
   2390             a[j2] = y0r + y2r;
   2391             a[j2 + 1] = y0i + y2i;
   2392             a[j3] = y0r - y2r;
   2393             a[j3 + 1] = y0i - y2i;
   2394             x0r = a[j0 - 2] - a[j2 - 1];
   2395             x0i = a[j0 - 1] + a[j2 - 2];
   2396             x1r = a[j0 - 2] + a[j2 - 1];
   2397             x1i = a[j0 - 1] - a[j2 - 2];
   2398             x2r = a[j1 - 2] - a[j3 - 1];
   2399             x2i = a[j1 - 1] + a[j3 - 2];
   2400             x3r = a[j1 - 2] + a[j3 - 1];
   2401             x3i = a[j1 - 1] - a[j3 - 2];
   2402             y0r = Mult_W_W(we1i, x0r) - Mult_W_W(we1r, x0i);
   2403             y0i = Mult_W_W(we1i, x0i) + Mult_W_W(we1r, x0r);
   2404             y2r = Mult_W_W(wl1i, x2r) - Mult_W_W(wl1r, x2i);
   2405             y2i = Mult_W_W(wl1i, x2i) + Mult_W_W(wl1r, x2r);
   2406             a[j0 - 2] = y0r + y2r;
   2407             a[j0 - 1] = y0i + y2i;
   2408             a[j1 - 2] = y0r - y2r;
   2409             a[j1 - 1] = y0i - y2i;
   2410             y0r = Mult_W_W(we3i, x1r) + Mult_W_W(we3r, x1i);
   2411             y0i = Mult_W_W(we3i, x1i) - Mult_W_W(we3r, x1r);
   2412             y2r = Mult_W_W(wl3i, x3r) + Mult_W_W(wl3r, x3i);
   2413             y2i = Mult_W_W(wl3i, x3i) - Mult_W_W(wl3r, x3r);
   2414             a[j2 - 2] = y0r + y2r;
   2415             a[j2 - 1] = y0i + y2i;
   2416             a[j3 - 2] = y0r - y2r;
   2417             a[j3 - 1] = y0i - y2i;
   2418             wk1r -= Mult_W_W(ss1, wl1i);
   2419             wk1i += Mult_W_W(ss1, wl1r);
   2420             wk3r -= Mult_W_W(ss3, wl3i);
   2421             wk3i += Mult_W_W(ss3, wl3r);
   2422             wd1r -= Mult_W_W(ss1, we1i);
   2423             wd1i += Mult_W_W(ss1, we1r);
   2424             wd3r -= Mult_W_W(ss3, we3i);
   2425             wd3i += Mult_W_W(ss3, we3r);
   2426         }
   2427         if (i0 == mh - 4) {
   2428             break;
   2429         }
   2430     }
   2431     wl1r = WR2500;
   2432     wl1i = WI2500;
   2433     j0 = mh;
   2434     j1 = j0 + m;
   2435     j2 = j1 + m;
   2436     j3 = j2 + m;
   2437     x0r = a[j0 - 2] - a[j2 - 1];
   2438     x0i = a[j0 - 1] + a[j2 - 2];
   2439     x1r = a[j0 - 2] + a[j2 - 1];
   2440     x1i = a[j0 - 1] - a[j2 - 2];
   2441     x2r = a[j1 - 2] - a[j3 - 1];
   2442     x2i = a[j1 - 1] + a[j3 - 2];
   2443     x3r = a[j1 - 2] + a[j3 - 1];
   2444     x3i = a[j1 - 1] - a[j3 - 2];
   2445     y0r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   2446     y0i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   2447     y2r = Mult_W_W(wd1r, x2r) - Mult_W_W(wd1i, x2i);
   2448     y2i = Mult_W_W(wd1r, x2i) + Mult_W_W(wd1i, x2r);
   2449     a[j0 - 2] = y0r + y2r;
   2450     a[j0 - 1] = y0i + y2i;
   2451     a[j1 - 2] = y0r - y2r;
   2452     a[j1 - 1] = y0i - y2i;
   2453     y0r = Mult_W_W(wk3r, x1r) + Mult_W_W(wk3i, x1i);
   2454     y0i = Mult_W_W(wk3r, x1i) - Mult_W_W(wk3i, x1r);
   2455     y2r = Mult_W_W(wd3r, x3r) + Mult_W_W(wd3i, x3i);
   2456     y2i = Mult_W_W(wd3r, x3i) - Mult_W_W(wd3i, x3r);
   2457     a[j2 - 2] = y0r + y2r;
   2458     a[j2 - 1] = y0i + y2i;
   2459     a[j3 - 2] = y0r - y2r;
   2460     a[j3 - 1] = y0i - y2i;
   2461     x0r = a[j0] - a[j2 + 1];
   2462     x0i = a[j0 + 1] + a[j2];
   2463     x1r = a[j0] + a[j2 + 1];
   2464     x1i = a[j0 + 1] - a[j2];
   2465     x2r = a[j1] - a[j3 + 1];
   2466     x2i = a[j1 + 1] + a[j3];
   2467     x3r = a[j1] + a[j3 + 1];
   2468     x3i = a[j1 + 1] - a[j3];
   2469     y0r = Mult_W_W(wl1r, x0r) - Mult_W_W(wl1i, x0i);
   2470     y0i = Mult_W_W(wl1r, x0i) + Mult_W_W(wl1i, x0r);
   2471     y2r = Mult_W_W(wl1i, x2r) - Mult_W_W(wl1r, x2i);
   2472     y2i = Mult_W_W(wl1i, x2i) + Mult_W_W(wl1r, x2r);
   2473     a[j0] = y0r + y2r;
   2474     a[j0 + 1] = y0i + y2i;
   2475     a[j1] = y0r - y2r;
   2476     a[j1 + 1] = y0i - y2i;
   2477     y0r = Mult_W_W(wl1i, x1r) - Mult_W_W(wl1r, x1i);
   2478     y0i = Mult_W_W(wl1i, x1i) + Mult_W_W(wl1r, x1r);
   2479     y2r = Mult_W_W(wl1r, x3r) - Mult_W_W(wl1i, x3i);
   2480     y2i = Mult_W_W(wl1r, x3i) + Mult_W_W(wl1i, x3r);
   2481     a[j2] = y0r - y2r;
   2482     a[j2 + 1] = y0i - y2i;
   2483     a[j3] = y0r + y2r;
   2484     a[j3 + 1] = y0i + y2i;
   2485     x0r = a[j0 + 2] - a[j2 + 3];
   2486     x0i = a[j0 + 3] + a[j2 + 2];
   2487     x1r = a[j0 + 2] + a[j2 + 3];
   2488     x1i = a[j0 + 3] - a[j2 + 2];
   2489     x2r = a[j1 + 2] - a[j3 + 3];
   2490     x2i = a[j1 + 3] + a[j3 + 2];
   2491     x3r = a[j1 + 2] + a[j3 + 3];
   2492     x3i = a[j1 + 3] - a[j3 + 2];
   2493     y0r = Mult_W_W(wd1i, x0r) - Mult_W_W(wd1r, x0i);
   2494     y0i = Mult_W_W(wd1i, x0i) + Mult_W_W(wd1r, x0r);
   2495     y2r = Mult_W_W(wk1i, x2r) - Mult_W_W(wk1r, x2i);
   2496     y2i = Mult_W_W(wk1i, x2i) + Mult_W_W(wk1r, x2r);
   2497     a[j0 + 2] = y0r + y2r;
   2498     a[j0 + 3] = y0i + y2i;
   2499     a[j1 + 2] = y0r - y2r;
   2500     a[j1 + 3] = y0i - y2i;
   2501     y0r = Mult_W_W(wd3i, x1r) + Mult_W_W(wd3r, x1i);
   2502     y0i = Mult_W_W(wd3i, x1i) - Mult_W_W(wd3r, x1r);
   2503     y2r = Mult_W_W(wk3i, x3r) + Mult_W_W(wk3r, x3i);
   2504     y2i = Mult_W_W(wk3i, x3i) - Mult_W_W(wk3r, x3r);
   2505     a[j2 + 2] = y0r + y2r;
   2506     a[j2 + 3] = y0i + y2i;
   2507     a[j3 + 2] = y0r - y2r;
   2508     a[j3 + 3] = y0i - y2i;
   2509 }
   2510 
   2511 
   2512 void cftfx41(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   2513 {
   2514 
   2515     if (n == 128) {
   2516         cftf161(a);
   2517         cftf162(&a[32]);
   2518         cftf161(&a[64]);
   2519         cftf161(&a[96]);
   2520     } else {
   2521         cftf081(a);
   2522         cftf082(&a[16]);
   2523         cftf081(&a[32]);
   2524         cftf081(&a[48]);
   2525     }
   2526 }
   2527 
   2528 
   2529 void cftf161(PICOFFTSG_FFTTYPE *a)
   2530 {
   2531     PICOFFTSG_FFTTYPE wn4r, wk1r, wk1i,
   2532         x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
   2533         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
   2534         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
   2535         y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
   2536         y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
   2537 
   2538     wn4r = WR5000;
   2539     wk1r = WR2500;
   2540     wk1i = WI2500;
   2541     x0r = a[0] + a[16];
   2542     x0i = a[1] + a[17];
   2543     x1r = a[0] - a[16];
   2544     x1i = a[1] - a[17];
   2545     x2r = a[8] + a[24];
   2546     x2i = a[9] + a[25];
   2547     x3r = a[8] - a[24];
   2548     x3i = a[9] - a[25];
   2549     y0r = x0r + x2r;
   2550     y0i = x0i + x2i;
   2551     y4r = x0r - x2r;
   2552     y4i = x0i - x2i;
   2553     y8r = x1r - x3i;
   2554     y8i = x1i + x3r;
   2555     y12r = x1r + x3i;
   2556     y12i = x1i - x3r;
   2557     x0r = a[2] + a[18];
   2558     x0i = a[3] + a[19];
   2559     x1r = a[2] - a[18];
   2560     x1i = a[3] - a[19];
   2561     x2r = a[10] + a[26];
   2562     x2i = a[11] + a[27];
   2563     x3r = a[10] - a[26];
   2564     x3i = a[11] - a[27];
   2565     y1r = x0r + x2r;
   2566     y1i = x0i + x2i;
   2567     y5r = x0r - x2r;
   2568     y5i = x0i - x2i;
   2569     x0r = x1r - x3i;
   2570     x0i = x1i + x3r;
   2571     y9r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   2572     y9i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   2573     x0r = x1r + x3i;
   2574     x0i = x1i - x3r;
   2575     y13r = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
   2576     y13i = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
   2577     x0r = a[4] + a[20];
   2578     x0i = a[5] + a[21];
   2579     x1r = a[4] - a[20];
   2580     x1i = a[5] - a[21];
   2581     x2r = a[12] + a[28];
   2582     x2i = a[13] + a[29];
   2583     x3r = a[12] - a[28];
   2584     x3i = a[13] - a[29];
   2585     y2r = x0r + x2r;
   2586     y2i = x0i + x2i;
   2587     y6r = x0r - x2r;
   2588     y6i = x0i - x2i;
   2589     x0r = x1r - x3i;
   2590     x0i = x1i + x3r;
   2591     y10r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2592     y10i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2593     x0r = x1r + x3i;
   2594     x0i = x1i - x3r;
   2595     y14r = picofftsg_mult_w_a(wn4r, (x0r + x0i));
   2596     y14i = picofftsg_mult_w_a(wn4r, (x0i - x0r));
   2597     x0r = a[6] + a[22];
   2598     x0i = a[7] + a[23];
   2599     x1r = a[6] - a[22];
   2600     x1i = a[7] - a[23];
   2601     x2r = a[14] + a[30];
   2602     x2i = a[15] + a[31];
   2603     x3r = a[14] - a[30];
   2604     x3i = a[15] - a[31];
   2605     y3r = x0r + x2r;
   2606     y3i = x0i + x2i;
   2607     y7r = x0r - x2r;
   2608     y7i = x0i - x2i;
   2609     x0r = x1r - x3i;
   2610     x0i = x1i + x3r;
   2611     y11r = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
   2612     y11i = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
   2613     x0r = x1r + x3i;
   2614     x0i = x1i - x3r;
   2615     y15r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   2616     y15i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   2617     x0r = y12r - y14r;
   2618     x0i = y12i - y14i;
   2619     x1r = y12r + y14r;
   2620     x1i = y12i + y14i;
   2621     x2r = y13r - y15r;
   2622     x2i = y13i - y15i;
   2623     x3r = y13r + y15r;
   2624     x3i = y13i + y15i;
   2625     a[24] = x0r + x2r;
   2626     a[25] = x0i + x2i;
   2627     a[26] = x0r - x2r;
   2628     a[27] = x0i - x2i;
   2629     a[28] = x1r - x3i;
   2630     a[29] = x1i + x3r;
   2631     a[30] = x1r + x3i;
   2632     a[31] = x1i - x3r;
   2633     x0r = y8r + y10r;
   2634     x0i = y8i + y10i;
   2635     x1r = y8r - y10r;
   2636     x1i = y8i - y10i;
   2637     x2r = y9r + y11r;
   2638     x2i = y9i + y11i;
   2639     x3r = y9r - y11r;
   2640     x3i = y9i - y11i;
   2641     a[16] = x0r + x2r;
   2642     a[17] = x0i + x2i;
   2643     a[18] = x0r - x2r;
   2644     a[19] = x0i - x2i;
   2645     a[20] = x1r - x3i;
   2646     a[21] = x1i + x3r;
   2647     a[22] = x1r + x3i;
   2648     a[23] = x1i - x3r;
   2649     x0r = y5r - y7i;
   2650     x0i = y5i + y7r;
   2651     x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2652     x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2653     x0r = y5r + y7i;
   2654     x0i = y5i - y7r;
   2655     x3r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2656     x3i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2657     x0r = y4r - y6i;
   2658     x0i = y4i + y6r;
   2659     x1r = y4r + y6i;
   2660     x1i = y4i - y6r;
   2661     a[8] = x0r + x2r;
   2662     a[9] = x0i + x2i;
   2663     a[10] = x0r - x2r;
   2664     a[11] = x0i - x2i;
   2665     a[12] = x1r - x3i;
   2666     a[13] = x1i + x3r;
   2667     a[14] = x1r + x3i;
   2668     a[15] = x1i - x3r;
   2669     x0r = y0r + y2r;
   2670     x0i = y0i + y2i;
   2671     x1r = y0r - y2r;
   2672     x1i = y0i - y2i;
   2673     x2r = y1r + y3r;
   2674     x2i = y1i + y3i;
   2675     x3r = y1r - y3r;
   2676     x3i = y1i - y3i;
   2677     a[0] = x0r + x2r;
   2678     a[1] = x0i + x2i;
   2679     a[2] = x0r - x2r;
   2680     a[3] = x0i - x2i;
   2681     a[4] = x1r - x3i;
   2682     a[5] = x1i + x3r;
   2683     a[6] = x1r + x3i;
   2684     a[7] = x1i - x3r;
   2685 }
   2686 
   2687 
   2688 void cftf162(PICOFFTSG_FFTTYPE *a)
   2689 {
   2690     PICOFFTSG_FFTTYPE wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i,
   2691         x0r, x0i, x1r, x1i, x2r, x2i,
   2692         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
   2693         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
   2694         y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
   2695         y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
   2696 
   2697     wn4r = WR5000;
   2698     wk1r = WR1250;
   2699     wk1i = WI1250;
   2700     wk2r = WR2500;
   2701     wk2i = WI2500;
   2702     wk3r = WR3750;
   2703     wk3i = WI3750;
   2704     x1r = a[0] - a[17];
   2705     x1i = a[1] + a[16];
   2706     x0r = a[8] - a[25];
   2707     x0i = a[9] + a[24];
   2708     x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2709     x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2710     y0r = x1r + x2r;
   2711     y0i = x1i + x2i;
   2712     y4r = x1r - x2r;
   2713     y4i = x1i - x2i;
   2714     x1r = a[0] + a[17];
   2715     x1i = a[1] - a[16];
   2716     x0r = a[8] + a[25];
   2717     x0i = a[9] - a[24];
   2718     x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2719     x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2720     y8r = x1r - x2i;
   2721     y8i = x1i + x2r;
   2722     y12r = x1r + x2i;
   2723     y12i = x1i - x2r;
   2724     x0r = a[2] - a[19];
   2725     x0i = a[3] + a[18];
   2726     x1r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   2727     x1i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   2728     x0r = a[10] - a[27];
   2729     x0i = a[11] + a[26];
   2730     x2r = Mult_W_W(wk3i, x0r) - Mult_W_W(wk3r, x0i);
   2731     x2i = Mult_W_W(wk3i, x0i) + Mult_W_W(wk3r, x0r);
   2732     y1r = x1r + x2r;
   2733     y1i = x1i + x2i;
   2734     y5r = x1r - x2r;
   2735     y5i = x1i - x2i;
   2736     x0r = a[2] + a[19];
   2737     x0i = a[3] - a[18];
   2738     x1r = Mult_W_W(wk3r, x0r) - Mult_W_W(wk3i, x0i);
   2739     x1i = Mult_W_W(wk3r, x0i) + Mult_W_W(wk3i, x0r);
   2740     x0r = a[10] + a[27];
   2741     x0i = a[11] - a[26];
   2742     x2r = Mult_W_W(wk1r, x0r) + Mult_W_W(wk1i, x0i);
   2743     x2i = Mult_W_W(wk1r, x0i) - Mult_W_W(wk1i, x0r);
   2744     y9r = x1r - x2r;
   2745     y9i = x1i - x2i;
   2746     y13r = x1r + x2r;
   2747     y13i = x1i + x2i;
   2748     x0r = a[4] - a[21];
   2749     x0i = a[5] + a[20];
   2750     x1r = Mult_W_W(wk2r, x0r) - Mult_W_W(wk2i, x0i);
   2751     x1i = Mult_W_W(wk2r, x0i) + Mult_W_W(wk2i, x0r);
   2752     x0r = a[12] - a[29];
   2753     x0i = a[13] + a[28];
   2754     x2r = Mult_W_W(wk2i, x0r) - Mult_W_W(wk2r, x0i);
   2755     x2i = Mult_W_W(wk2i, x0i) + Mult_W_W(wk2r, x0r);
   2756     y2r = x1r + x2r;
   2757     y2i = x1i + x2i;
   2758     y6r = x1r - x2r;
   2759     y6i = x1i - x2i;
   2760     x0r = a[4] + a[21];
   2761     x0i = a[5] - a[20];
   2762     x1r = Mult_W_W(wk2i, x0r) - Mult_W_W(wk2r, x0i);
   2763     x1i = Mult_W_W(wk2i, x0i) + Mult_W_W(wk2r, x0r);
   2764     x0r = a[12] + a[29];
   2765     x0i = a[13] - a[28];
   2766     x2r = Mult_W_W(wk2r, x0r) - Mult_W_W(wk2i, x0i);
   2767     x2i = Mult_W_W(wk2r, x0i) + Mult_W_W(wk2i, x0r);
   2768     y10r = x1r - x2r;
   2769     y10i = x1i - x2i;
   2770     y14r = x1r + x2r;
   2771     y14i = x1i + x2i;
   2772     x0r = a[6] - a[23];
   2773     x0i = a[7] + a[22];
   2774     x1r = Mult_W_W(wk3r, x0r) - Mult_W_W(wk3i, x0i);
   2775     x1i = Mult_W_W(wk3r, x0i) + Mult_W_W(wk3i, x0r);
   2776     x0r = a[14] - a[31];
   2777     x0i = a[15] + a[30];
   2778     x2r = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
   2779     x2i = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
   2780     y3r = x1r + x2r;
   2781     y3i = x1i + x2i;
   2782     y7r = x1r - x2r;
   2783     y7i = x1i - x2i;
   2784     x0r = a[6] + a[23];
   2785     x0i = a[7] - a[22];
   2786     x1r = Mult_W_W(wk1i, x0r) + Mult_W_W(wk1r, x0i);
   2787     x1i = Mult_W_W(wk1i, x0i) - Mult_W_W(wk1r, x0r);
   2788     x0r = a[14] + a[31];
   2789     x0i = a[15] - a[30];
   2790     x2r = Mult_W_W(wk3i, x0r) - Mult_W_W(wk3r, x0i);
   2791     x2i = Mult_W_W(wk3i, x0i) + Mult_W_W(wk3r, x0r);
   2792     y11r = x1r + x2r;
   2793     y11i = x1i + x2i;
   2794     y15r = x1r - x2r;
   2795     y15i = x1i - x2i;
   2796     x1r = y0r + y2r;
   2797     x1i = y0i + y2i;
   2798     x2r = y1r + y3r;
   2799     x2i = y1i + y3i;
   2800     a[0] = x1r + x2r;
   2801     a[1] = x1i + x2i;
   2802     a[2] = x1r - x2r;
   2803     a[3] = x1i - x2i;
   2804     x1r = y0r - y2r;
   2805     x1i = y0i - y2i;
   2806     x2r = y1r - y3r;
   2807     x2i = y1i - y3i;
   2808     a[4] = x1r - x2i;
   2809     a[5] = x1i + x2r;
   2810     a[6] = x1r + x2i;
   2811     a[7] = x1i - x2r;
   2812     x1r = y4r - y6i;
   2813     x1i = y4i + y6r;
   2814     x0r = y5r - y7i;
   2815     x0i = y5i + y7r;
   2816     x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2817     x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2818     a[8] = x1r + x2r;
   2819     a[9] = x1i + x2i;
   2820     a[10] = x1r - x2r;
   2821     a[11] = x1i - x2i;
   2822     x1r = y4r + y6i;
   2823     x1i = y4i - y6r;
   2824     x0r = y5r + y7i;
   2825     x0i = y5i - y7r;
   2826     x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2827     x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2828     a[12] = x1r - x2i;
   2829     a[13] = x1i + x2r;
   2830     a[14] = x1r + x2i;
   2831     a[15] = x1i - x2r;
   2832     x1r = y8r + y10r;
   2833     x1i = y8i + y10i;
   2834     x2r = y9r - y11r;
   2835     x2i = y9i - y11i;
   2836     a[16] = x1r + x2r;
   2837     a[17] = x1i + x2i;
   2838     a[18] = x1r - x2r;
   2839     a[19] = x1i - x2i;
   2840     x1r = y8r - y10r;
   2841     x1i = y8i - y10i;
   2842     x2r = y9r + y11r;
   2843     x2i = y9i + y11i;
   2844     a[20] = x1r - x2i;
   2845     a[21] = x1i + x2r;
   2846     a[22] = x1r + x2i;
   2847     a[23] = x1i - x2r;
   2848     x1r = y12r - y14i;
   2849     x1i = y12i + y14r;
   2850     x0r = y13r + y15i;
   2851     x0i = y13i - y15r;
   2852     x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2853     x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2854     a[24] = x1r + x2r;
   2855     a[25] = x1i + x2i;
   2856     a[26] = x1r - x2r;
   2857     a[27] = x1i - x2i;
   2858     x1r = y12r + y14i;
   2859     x1i = y12i - y14r;
   2860     x0r = y13r - y15i;
   2861     x0i = y13i + y15r;
   2862     x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2863     x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2864     a[28] = x1r - x2i;
   2865     a[29] = x1i + x2r;
   2866     a[30] = x1r + x2i;
   2867     a[31] = x1i - x2r;
   2868 }
   2869 
   2870 
   2871 void cftf081(PICOFFTSG_FFTTYPE *a)
   2872 {
   2873     PICOFFTSG_FFTTYPE wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
   2874         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
   2875         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
   2876 
   2877     wn4r = WR5000;
   2878     x0r = a[0] + a[8];
   2879     x0i = a[1] + a[9];
   2880     x1r = a[0] - a[8];
   2881     x1i = a[1] - a[9];
   2882     x2r = a[4] + a[12];
   2883     x2i = a[5] + a[13];
   2884     x3r = a[4] - a[12];
   2885     x3i = a[5] - a[13];
   2886     y0r = x0r + x2r;
   2887     y0i = x0i + x2i;
   2888     y2r = x0r - x2r;
   2889     y2i = x0i - x2i;
   2890     y1r = x1r - x3i;
   2891     y1i = x1i + x3r;
   2892     y3r = x1r + x3i;
   2893     y3i = x1i - x3r;
   2894     x0r = a[2] + a[10];
   2895     x0i = a[3] + a[11];
   2896     x1r = a[2] - a[10];
   2897     x1i = a[3] - a[11];
   2898     x2r = a[6] + a[14];
   2899     x2i = a[7] + a[15];
   2900     x3r = a[6] - a[14];
   2901     x3i = a[7] - a[15];
   2902     y4r = x0r + x2r;
   2903     y4i = x0i + x2i;
   2904     y6r = x0r - x2r;
   2905     y6i = x0i - x2i;
   2906     x0r = x1r - x3i;
   2907     x0i = x1i + x3r;
   2908     x2r = x1r + x3i;
   2909     x2i = x1i - x3r;
   2910     y5r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2911     y5i = picofftsg_mult_w_a(wn4r, (x0r + x0i));
   2912     y7r = picofftsg_mult_w_a(wn4r, (x2r - x2i));
   2913     y7i = picofftsg_mult_w_a(wn4r, (x2r + x2i));
   2914     a[8] = y1r + y5r;
   2915     a[9] = y1i + y5i;
   2916     a[10] = y1r - y5r;
   2917     a[11] = y1i - y5i;
   2918     a[12] = y3r - y7i;
   2919     a[13] = y3i + y7r;
   2920     a[14] = y3r + y7i;
   2921     a[15] = y3i - y7r;
   2922     a[0] = y0r + y4r;
   2923     a[1] = y0i + y4i;
   2924     a[2] = y0r - y4r;
   2925     a[3] = y0i - y4i;
   2926     a[4] = y2r - y6i;
   2927     a[5] = y2i + y6r;
   2928     a[6] = y2r + y6i;
   2929     a[7] = y2i - y6r;
   2930 }
   2931 
   2932 
   2933 void cftf082(PICOFFTSG_FFTTYPE *a)
   2934 {
   2935     PICOFFTSG_FFTTYPE wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i,
   2936         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
   2937         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
   2938 
   2939     wn4r = WR5000;
   2940     wk1r = WR2500;
   2941     wk1i = WI2500;
   2942     y0r = a[0] - a[9];
   2943     y0i = a[1] + a[8];
   2944     y1r = a[0] + a[9];
   2945     y1i = a[1] - a[8];
   2946     x0r = a[4] - a[13];
   2947     x0i = a[5] + a[12];
   2948     y2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2949     y2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2950     x0r = a[4] + a[13];
   2951     x0i = a[5] - a[12];
   2952     y3r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
   2953     y3i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
   2954     x0r = a[2] - a[11];
   2955     x0i = a[3] + a[10];
   2956     y4r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   2957     y4i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   2958     x0r = a[2] + a[11];
   2959     x0i = a[3] - a[10];
   2960     y5r = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
   2961     y5i = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
   2962     x0r = a[6] - a[15];
   2963     x0i = a[7] + a[14];
   2964     y6r = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
   2965     y6i = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
   2966     x0r = a[6] + a[15];
   2967     x0i = a[7] - a[14];
   2968     y7r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
   2969     y7i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
   2970     x0r = y0r + y2r;
   2971     x0i = y0i + y2i;
   2972     x1r = y4r + y6r;
   2973     x1i = y4i + y6i;
   2974     a[0] = x0r + x1r;
   2975     a[1] = x0i + x1i;
   2976     a[2] = x0r - x1r;
   2977     a[3] = x0i - x1i;
   2978     x0r = y0r - y2r;
   2979     x0i = y0i - y2i;
   2980     x1r = y4r - y6r;
   2981     x1i = y4i - y6i;
   2982     a[4] = x0r - x1i;
   2983     a[5] = x0i + x1r;
   2984     a[6] = x0r + x1i;
   2985     a[7] = x0i - x1r;
   2986     x0r = y1r - y3i;
   2987     x0i = y1i + y3r;
   2988     x1r = y5r - y7r;
   2989     x1i = y5i - y7i;
   2990     a[8] = x0r + x1r;
   2991     a[9] = x0i + x1i;
   2992     a[10] = x0r - x1r;
   2993     a[11] = x0i - x1i;
   2994     x0r = y1r + y3i;
   2995     x0i = y1i - y3r;
   2996     x1r = y5r + y7r;
   2997     x1i = y5i + y7i;
   2998     a[12] = x0r - x1i;
   2999     a[13] = x0i + x1r;
   3000     a[14] = x0r + x1i;
   3001     a[15] = x0i - x1r;
   3002 }
   3003 
   3004 
   3005 void cftf040(PICOFFTSG_FFTTYPE *a)
   3006 {
   3007     PICOFFTSG_FFTTYPE x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
   3008 
   3009     x0r = a[0] + a[4];
   3010     x0i = a[1] + a[5];
   3011     x1r = a[0] - a[4];
   3012     x1i = a[1] - a[5];
   3013     x2r = a[2] + a[6];
   3014     x2i = a[3] + a[7];
   3015     x3r = a[2] - a[6];
   3016     x3i = a[3] - a[7];
   3017     a[0] = x0r + x2r;
   3018     a[1] = x0i + x2i;
   3019     a[2] = x1r - x3i;
   3020     a[3] = x1i + x3r;
   3021     a[4] = x0r - x2r;
   3022     a[5] = x0i - x2i;
   3023     a[6] = x1r + x3i;
   3024     a[7] = x1i - x3r;
   3025 }
   3026 
   3027 
   3028 void cftb040(PICOFFTSG_FFTTYPE *a)
   3029 {
   3030     PICOFFTSG_FFTTYPE x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
   3031 
   3032     x0r = a[0] + a[4];
   3033     x0i = a[1] + a[5];
   3034     x1r = a[0] - a[4];
   3035     x1i = a[1] - a[5];
   3036     x2r = a[2] + a[6];
   3037     x2i = a[3] + a[7];
   3038     x3r = a[2] - a[6];
   3039     x3i = a[3] - a[7];
   3040     a[0] = x0r + x2r;
   3041     a[1] = x0i + x2i;
   3042     a[2] = x1r + x3i;
   3043     a[3] = x1i - x3r;
   3044     a[4] = x0r - x2r;
   3045     a[5] = x0i - x2i;
   3046     a[6] = x1r - x3i;
   3047     a[7] = x1i + x3r;
   3048 }
   3049 
   3050 
   3051 void cftx020(PICOFFTSG_FFTTYPE *a)
   3052 {
   3053     PICOFFTSG_FFTTYPE x0r, x0i;
   3054 
   3055     x0r = a[0] - a[2];
   3056     x0i = a[1] - a[3];
   3057     a[0] += a[2];
   3058     a[1] += a[3];
   3059     a[2] = x0r;
   3060     a[3] = x0i;
   3061 }
   3062 
   3063 
   3064 void rftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   3065 {
   3066     picoos_int32 i, i0, j, k;
   3067     PICOFFTSG_FFTTYPE w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
   3068 
   3069     wkr = 0;
   3070     wki = 0;
   3071 
   3072     switch (n) {
   3073         case 8 :
   3074             wdi=(PICOFFTSG_FFTTYPE)(0.353553414345*PICODSP_WGT_SHIFT); wdr=(PICOFFTSG_FFTTYPE)(0.146446630359*PICODSP_WGT_SHIFT);
   3075             w1r=(PICOFFTSG_FFTTYPE)(0.707106709480*PICODSP_WGT_SHIFT); w1i=(PICOFFTSG_FFTTYPE)(0.707106828690*PICODSP_WGT_SHIFT);
   3076             ss =(PICOFFTSG_FFTTYPE)(1.414213657379*PICODSP_WGT_SHIFT); break;
   3077         case 16 :
   3078             wdi=(PICOFFTSG_FFTTYPE)(0.191341713071*PICODSP_WGT_SHIFT); wdr=(PICOFFTSG_FFTTYPE)(0.038060232997*PICODSP_WGT_SHIFT);
   3079             w1r=(PICOFFTSG_FFTTYPE)(0.923879504204*PICODSP_WGT_SHIFT); w1i=(PICOFFTSG_FFTTYPE)(0.382683426142*PICODSP_WGT_SHIFT);
   3080             ss =(PICOFFTSG_FFTTYPE)(0.765366852283*PICODSP_WGT_SHIFT); break;
   3081         case 32 :
   3082             wdi=(PICOFFTSG_FFTTYPE)(0.097545161843*PICODSP_WGT_SHIFT); wdr=(PICOFFTSG_FFTTYPE)(0.009607359767*PICODSP_WGT_SHIFT);
   3083             w1r=(PICOFFTSG_FFTTYPE)(0.980785250664*PICODSP_WGT_SHIFT); w1i=(PICOFFTSG_FFTTYPE)(0.195090323687*PICODSP_WGT_SHIFT);
   3084             ss =(PICOFFTSG_FFTTYPE)(0.390180647373*PICODSP_WGT_SHIFT); break;
   3085         case 64 :
   3086             wdi=(PICOFFTSG_FFTTYPE)(0.049008570611*PICODSP_WGT_SHIFT); wdr=(PICOFFTSG_FFTTYPE)(0.002407636726*PICODSP_WGT_SHIFT);
   3087             w1r=(PICOFFTSG_FFTTYPE)(0.995184719563*PICODSP_WGT_SHIFT); w1i=(PICOFFTSG_FFTTYPE)(0.098017141223*PICODSP_WGT_SHIFT);
   3088             ss =(PICOFFTSG_FFTTYPE)(0.196034282446*PICODSP_WGT_SHIFT); break;
   3089         default :
   3090             wdr = 0; wdi = 0; ss = 0;
   3091             break;
   3092     }
   3093 
   3094     i = n >> 1;
   3095     for (;;) {
   3096         i0 = i - RDFT_LOOP_DIV_4;
   3097         if (i0 < 4) {
   3098             i0 = 4;
   3099         }
   3100         for (j = i - 4; j >= i0; j -= 4) {
   3101             k = n - j;
   3102             xr = a[j + 2] - a[k - 2];
   3103             xi = a[j + 3] + a[k - 1];
   3104             yr = Mult_W_W(wdr, xr) - Mult_W_W(wdi, xi);
   3105             yi = Mult_W_W(wdr, xi) + Mult_W_W(wdi, xr);
   3106             a[j + 2] -= yr;
   3107             a[j + 3] -= yi;
   3108             a[k - 2] += yr;
   3109             a[k - 1] -= yi;
   3110             wkr += Mult_W_W(ss, wdi);
   3111             wki += picofftsg_mult_w_w(ss, (PICOFFTSG_WGT_SHIFT2 - wdr));
   3112             xr = a[j] - a[k];
   3113             xi = a[j + 1] + a[k + 1];
   3114             yr = Mult_W_W(wkr, xr) - Mult_W_W(wki, xi);
   3115             yi = Mult_W_W(wkr, xi) + Mult_W_W(wki, xr);
   3116             a[j] -= yr;
   3117             a[j + 1] -= yi;
   3118             a[k] += yr;
   3119             a[k + 1] -= yi;
   3120             wdr += Mult_W_W(ss, wki);
   3121             wdi += picofftsg_mult_w_w(ss, (PICOFFTSG_WGT_SHIFT2 - wkr));
   3122         }
   3123         if (i0 == 4) {
   3124             break;
   3125         }
   3126     }
   3127 
   3128     xr = a[2] - a[n - 2];
   3129     xi = a[3] + a[n - 1];
   3130     yr = Mult_W_W(wdr, xr) - Mult_W_W(wdi, xi);
   3131     yi = Mult_W_W(wdr, xi) + Mult_W_W(wdi, xr);
   3132 
   3133     a[2] -= yr;
   3134     a[3] -= yi;
   3135     a[n - 2] += yr;
   3136     a[n - 1] -= yi;
   3137 
   3138 
   3139 }
   3140 
   3141 
   3142 void rftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   3143 {
   3144     picoos_int32 i, i0, j, k;
   3145     PICOFFTSG_FFTTYPE w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
   3146     wkr = 0;
   3147     wki = 0;
   3148     wdi=(PICOFFTSG_FFTTYPE)(0.012270614505*PICODSP_WGT_SHIFT);
   3149     wdr=(PICOFFTSG_FFTTYPE)(0.000150590655*PICODSP_WGT_SHIFT);
   3150     w1r=(PICOFFTSG_FFTTYPE)(0.999698817730*PICODSP_WGT_SHIFT);
   3151     w1i=(PICOFFTSG_FFTTYPE)(0.024541229010*PICODSP_WGT_SHIFT);
   3152     ss=(PICOFFTSG_FFTTYPE)(0.049082458019*PICODSP_WGT_SHIFT);
   3153 
   3154     i = n >> 1;
   3155     for (;;) {
   3156         i0 = i - RDFT_LOOP_DIV4;
   3157         if (i0 < 4) {
   3158             i0 = 4;
   3159         }
   3160         for (j = i - 4; j >= i0; j -= 4) {
   3161             k = n - j;
   3162             xr = a[j + 2] - a[k - 2];
   3163             xi = a[j + 3] + a[k - 1];
   3164             yr = Mult_W_W(wdr, xr) + Mult_W_W(wdi, xi);
   3165             yi = Mult_W_W(wdr, xi) - Mult_W_W(wdi, xr);
   3166             a[j + 2] -= yr;
   3167             a[j + 3] -= yi;
   3168             a[k - 2] += yr;
   3169             a[k - 1] -= yi;
   3170             wkr += Mult_W_W(ss, wdi);
   3171             wki += picofftsg_mult_w_w(ss, (PICOFFTSG_WGT_SHIFT2 - wdr));
   3172             xr = a[j] - a[k];
   3173             xi = a[j + 1] + a[k + 1];
   3174             yr = Mult_W_W(wkr, xr) + Mult_W_W(wki, xi);
   3175             yi = Mult_W_W(wkr, xi) - Mult_W_W(wki, xr);
   3176             a[j] -= yr;
   3177             a[j + 1] -= yi;
   3178             a[k] += yr;
   3179             a[k + 1] -= yi;
   3180             wdr += Mult_W_W(ss, wki);
   3181             wdi += picofftsg_mult_w_w(ss, (PICOFFTSG_WGT_SHIFT2 - wkr));
   3182         }
   3183         if (i0 == 4) {
   3184             break;
   3185         }
   3186     }
   3187     xr = a[2] - a[n - 2];
   3188     xi = a[3] + a[n - 1];
   3189     yr = Mult_W_W(wdr, xr) + Mult_W_W(wdi, xi);
   3190     yi = Mult_W_W(wdr, xi) - Mult_W_W(wdi, xr);
   3191     a[2] -= yr;
   3192     a[3] -= yi;
   3193     a[n - 2] += yr;
   3194     a[n - 1] -= yi;
   3195 }
   3196 
   3197 
   3198 void dctsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   3199 {
   3200     picoos_int32 i, i0, j, k, m;
   3201     PICOFFTSG_FFTTYPE w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
   3202     wkr = (PICOFFTSG_FFTTYPE)(0.5*PICODSP_WGT_SHIFT);
   3203     wki = (PICOFFTSG_FFTTYPE)(0.5*PICODSP_WGT_SHIFT);
   3204 
   3205     switch (n) {
   3206         case 8 :  wdi=(PICOFFTSG_FFTTYPE)(0.587937772274*PICODSP_WGT_SHIFT);
   3207             wdr=(PICOFFTSG_FFTTYPE)(0.392847478390*PICODSP_WGT_SHIFT); w1r=(PICOFFTSG_FFTTYPE)(0.980785250664*PICODSP_WGT_SHIFT);
   3208             w1i=(PICOFFTSG_FFTTYPE)(0.195090323687*PICODSP_WGT_SHIFT); ss =(PICOFFTSG_FFTTYPE)(0.390180647373*PICODSP_WGT_SHIFT); break;
   3209         case 16 : wdi=(PICOFFTSG_FFTTYPE)(0.546600937843*PICODSP_WGT_SHIFT);
   3210             wdr=(PICOFFTSG_FFTTYPE)(0.448583781719*PICODSP_WGT_SHIFT); w1r=(PICOFFTSG_FFTTYPE)(0.995184719563*PICODSP_WGT_SHIFT);
   3211             w1i=(PICOFFTSG_FFTTYPE)(0.098017141223*PICODSP_WGT_SHIFT); ss =(PICOFFTSG_FFTTYPE)(0.196034282446*PICODSP_WGT_SHIFT); break;
   3212         case 32 : wdi=(PICOFFTSG_FFTTYPE)(0.523931562901*PICODSP_WGT_SHIFT);
   3213             wdr=(PICOFFTSG_FFTTYPE)(0.474863886833*PICODSP_WGT_SHIFT); w1r=(PICOFFTSG_FFTTYPE)(0.998795449734*PICODSP_WGT_SHIFT);
   3214             w1i=(PICOFFTSG_FFTTYPE)(0.049067676067*PICODSP_WGT_SHIFT); ss =(PICOFFTSG_FFTTYPE)(0.098135352135*PICODSP_WGT_SHIFT); break;
   3215         case 64 : wdi=(PICOFFTSG_FFTTYPE)(0.512120008469*PICODSP_WGT_SHIFT);
   3216             wdr=(PICOFFTSG_FFTTYPE)(0.487578809261*PICODSP_WGT_SHIFT); w1r=(PICOFFTSG_FFTTYPE)(0.999698817730*PICODSP_WGT_SHIFT);
   3217             w1i=(PICOFFTSG_FFTTYPE)(0.024541229010*PICODSP_WGT_SHIFT); ss =(PICOFFTSG_FFTTYPE)(0.049082458019*PICODSP_WGT_SHIFT); break;
   3218         default:
   3219             wdr = 0; wdi = 0; ss = 0; break;
   3220     }
   3221 
   3222     m = n >> 1;
   3223     i = 0;
   3224     for (;;) {
   3225         i0 = i + DCST_LOOP_DIV2;
   3226         if (i0 > m - 2) {
   3227             i0 = m - 2;
   3228         }
   3229         for (j = i + 2; j <= i0; j += 2) {
   3230             k = n - j;
   3231             xr = picofftsg_mult_w_a(wdi, a[j - 1]) - picofftsg_mult_w_a(wdr, a[k + 1]);
   3232             xi = picofftsg_mult_w_a(wdr, a[j - 1]) + picofftsg_mult_w_a(wdi, a[k + 1]);
   3233             wkr -= Mult_W_W(ss, wdi);
   3234             wki += Mult_W_W(ss, wdr);
   3235             yr = Mult_W_W(wki, a[j]) - Mult_W_W(wkr, a[k]);
   3236             yi = Mult_W_W(wkr, a[j]) + Mult_W_W(wki, a[k]);
   3237             wdr -= Mult_W_W(ss, wki);
   3238             wdi += Mult_W_W(ss, wkr);
   3239             a[k + 1] = xr;
   3240             a[k] = yr;
   3241             a[j - 1] = xi;
   3242             a[j] = yi;
   3243         }
   3244         if (i0 == m - 2) {
   3245             break;
   3246         }
   3247     }
   3248     xr = picofftsg_mult_w_a(wdi, a[m - 1]) - picofftsg_mult_w_a(wdr, a[m + 1]);
   3249     a[m - 1] = picofftsg_mult_w_a(wdr, a[m - 1]) + picofftsg_mult_w_a(wdi, a[m + 1]);
   3250     a[m + 1] = xr;
   3251     a[m] = Mult_W_W(WR5000, a[m]);
   3252 }
   3253 
   3254 
   3255 void dctsub4(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
   3256 {
   3257     picoos_int32 m;
   3258     PICOFFTSG_FFTTYPE wki, wdr, wdi, xr;
   3259 
   3260     wki = WR5000;
   3261     m = n >> 1;
   3262     if (m == 2) {
   3263         wdr = Mult_W_W(wki, WI2500);
   3264         wdi = Mult_W_W(wki, WR2500);
   3265         xr = Mult_W_W(wdi, a[1]) - Mult_W_W(wdr, a[3]);
   3266         a[1] = Mult_W_W(wdr, a[1]) + Mult_W_W(wdi, a[3]);
   3267         a[3] = xr;
   3268     }
   3269     a[m] = Mult_W_W(wki, a[m]);
   3270 }
   3271 
   3272 #ifdef __cplusplus
   3273 }
   3274 #endif
   3275