Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 /*
     12  * fft.c
     13  *
     14  * Fast Fourier Transform
     15  *
     16  */
     17 
     18 
     19 #include "fft.h"
     20 
     21 const int16_t kSortTabFft[240] = {
     22   0, 60, 120, 180, 20, 80, 140, 200, 40, 100, 160, 220,
     23   4, 64, 124, 184, 24, 84, 144, 204, 44, 104, 164, 224,
     24   8, 68, 128, 188, 28, 88, 148, 208, 48, 108, 168, 228,
     25   12, 72, 132, 192, 32, 92, 152, 212, 52, 112, 172, 232,
     26   16, 76, 136, 196, 36, 96, 156, 216, 56, 116, 176, 236,
     27   1, 61, 121, 181, 21, 81, 141, 201, 41, 101, 161, 221,
     28   5, 65, 125, 185, 25, 85, 145, 205, 45, 105, 165, 225,
     29   9, 69, 129, 189, 29, 89, 149, 209, 49, 109, 169, 229,
     30   13, 73, 133, 193, 33, 93, 153, 213, 53, 113, 173, 233,
     31   17, 77, 137, 197, 37, 97, 157, 217, 57, 117, 177, 237,
     32   2, 62, 122, 182, 22, 82, 142, 202, 42, 102, 162, 222,
     33   6, 66, 126, 186, 26, 86, 146, 206, 46, 106, 166, 226,
     34   10, 70, 130, 190, 30, 90, 150, 210, 50, 110, 170, 230,
     35   14, 74, 134, 194, 34, 94, 154, 214, 54, 114, 174, 234,
     36   18, 78, 138, 198, 38, 98, 158, 218, 58, 118, 178, 238,
     37   3, 63, 123, 183, 23, 83, 143, 203, 43, 103, 163, 223,
     38   7, 67, 127, 187, 27, 87, 147, 207, 47, 107, 167, 227,
     39   11, 71, 131, 191, 31, 91, 151, 211, 51, 111, 171, 231,
     40   15, 75, 135, 195, 35, 95, 155, 215, 55, 115, 175, 235,
     41   19, 79, 139, 199, 39, 99, 159, 219, 59, 119, 179, 239
     42 };
     43 
     44 /* Cosine table in Q14 */
     45 const int16_t kCosTabFfftQ14[240] = {
     46   16384,  16378, 16362,   16333,  16294,  16244,  16182,  16110,  16026,  15931,  15826,  15709,
     47   15582,  15444, 15296,   15137,  14968,  14788,  14598,  14399,  14189,  13970,  13741,  13502,
     48   13255,  12998, 12733,   12458,  12176,  11885,  11585,  11278,  10963,  10641,  10311,   9974,
     49   9630,   9280,  8923,    8561,   8192,   7818,   7438,   7053,   6664,   6270,   5872,   5469,
     50   5063,   4653,  4240,    3825,   3406,   2986,   2563,   2139,   1713,   1285,    857,    429,
     51   0,   -429,  -857,   -1285,  -1713,  -2139,  -2563,  -2986,  -3406,  -3825,  -4240,  -4653,
     52   -5063,  -5469, -5872,   -6270,  -6664,  -7053,  -7438,  -7818,  -8192,  -8561,  -8923,  -9280,
     53   -9630,  -9974, -10311, -10641, -10963, -11278, -11585, -11885, -12176, -12458, -12733, -12998,
     54   -13255, -13502, -13741, -13970, -14189, -14399, -14598, -14788, -14968, -15137, -15296, -15444,
     55   -15582, -15709, -15826, -15931, -16026, -16110, -16182, -16244, -16294, -16333, -16362, -16378,
     56   -16384, -16378, -16362, -16333, -16294, -16244, -16182, -16110, -16026, -15931, -15826, -15709,
     57   -15582, -15444, -15296, -15137, -14968, -14788, -14598, -14399, -14189, -13970, -13741, -13502,
     58   -13255, -12998, -12733, -12458, -12176, -11885, -11585, -11278, -10963, -10641, -10311,  -9974,
     59   -9630,  -9280,  -8923,  -8561,  -8192,  -7818,  -7438,  -7053,  -6664,  -6270,  -5872,  -5469,
     60   -5063,  -4653,  -4240,  -3825,  -3406,  -2986,  -2563,  -2139,  -1713,  -1285,   -857,   -429,
     61   0,    429,    857,   1285,   1713,   2139,   2563,   2986,   3406,   3825,   4240,   4653,
     62   5063,   5469,   5872,   6270,   6664,   7053,   7438,   7818,   8192,   8561,   8923,   9280,
     63   9630,   9974,  10311,  10641,  10963,  11278,  11585,  11885,  12176,  12458,  12733,  12998,
     64   13255,  13502,  13741,  13970,  14189,  14399,  14598,  14788,  14968,  15137,  15296,  15444,
     65   15582,  15709,  15826,  15931,  16026,  16110,  16182,  16244,  16294,  16333,  16362,  16378
     66 };
     67 
     68 
     69 
     70 /* Uses 16x16 mul, without rounding, which is faster. Uses WEBRTC_SPL_MUL_16_16_RSFT */
     71 int16_t WebRtcIsacfix_FftRadix16Fastest(int16_t RexQx[], int16_t ImxQx[], int16_t iSign) {
     72 
     73   int16_t dd, ee, ff, gg, hh, ii;
     74   int16_t k0, k1, k2, k3, k4, kk;
     75   int16_t tmp116, tmp216;
     76 
     77   int16_t ccc1Q14, ccc2Q14, ccc3Q14, sss1Q14, sss2Q14, sss3Q14;
     78   int16_t sss60Q14, ccc72Q14, sss72Q14;
     79   int16_t aaQx, ajQx, akQx, ajmQx, ajpQx, akmQx, akpQx;
     80   int16_t bbQx, bjQx, bkQx, bjmQx, bjpQx, bkmQx, bkpQx;
     81 
     82   int16_t ReDATAQx[240],  ImDATAQx[240];
     83 
     84   sss60Q14 = kCosTabFfftQ14[20];
     85   ccc72Q14 = kCosTabFfftQ14[48];
     86   sss72Q14 = kCosTabFfftQ14[12];
     87 
     88   if (iSign < 0) {
     89     sss72Q14 = -sss72Q14;
     90     sss60Q14 = -sss60Q14;
     91   }
     92   /* Complexity is: 10 cycles */
     93 
     94   /* compute fourier transform */
     95 
     96   // transform for factor of 4
     97   for (kk=0; kk<60; kk++) {
     98     k0 = kk;
     99     k1 = k0 + 60;
    100     k2 = k1 + 60;
    101     k3 = k2 + 60;
    102 
    103     akpQx = RexQx[k0] + RexQx[k2];
    104     akmQx = RexQx[k0] - RexQx[k2];
    105     ajpQx = RexQx[k1] + RexQx[k3];
    106     ajmQx = RexQx[k1] - RexQx[k3];
    107     bkpQx = ImxQx[k0] + ImxQx[k2];
    108     bkmQx = ImxQx[k0] - ImxQx[k2];
    109     bjpQx = ImxQx[k1] + ImxQx[k3];
    110     bjmQx = ImxQx[k1] - ImxQx[k3];
    111 
    112     RexQx[k0] = akpQx + ajpQx;
    113     ImxQx[k0] = bkpQx + bjpQx;
    114     ajpQx = akpQx - ajpQx;
    115     bjpQx = bkpQx - bjpQx;
    116     if (iSign < 0) {
    117       akpQx = akmQx + bjmQx;
    118       bkpQx = bkmQx - ajmQx;
    119       akmQx -= bjmQx;
    120       bkmQx += ajmQx;
    121     } else {
    122       akpQx = akmQx - bjmQx;
    123       bkpQx = bkmQx + ajmQx;
    124       akmQx += bjmQx;
    125       bkmQx -= ajmQx;
    126     }
    127 
    128     ccc1Q14 = kCosTabFfftQ14[kk];
    129     ccc2Q14 = kCosTabFfftQ14[2 * kk];
    130     ccc3Q14 = kCosTabFfftQ14[3 * kk];
    131     sss1Q14 = kCosTabFfftQ14[kk + 60];
    132     sss2Q14 = kCosTabFfftQ14[2 * kk + 60];
    133     sss3Q14 = kCosTabFfftQ14[3 * kk + 60];
    134     if (iSign==1) {
    135       sss1Q14 = -sss1Q14;
    136       sss2Q14 = -sss2Q14;
    137       sss3Q14 = -sss3Q14;
    138     }
    139 
    140     //Do several multiplications like Q14*Q16>>14 = Q16
    141     // RexQ16[k1] = akpQ16 * ccc1Q14 - bkpQ16 * sss1Q14;
    142     // RexQ16[k2] = ajpQ16 * ccc2Q14 - bjpQ16 * sss2Q14;
    143     // RexQ16[k3] = akmQ16 * ccc3Q14 - bkmQ16 * sss3Q14;
    144     // ImxQ16[k1] = akpQ16 * sss1Q14 + bkpQ16 * ccc1Q14;
    145     // ImxQ16[k2] = ajpQ16 * sss2Q14 + bjpQ16 * ccc2Q14;
    146     // ImxQ16[k3] = akmQ16 * sss3Q14 + bkmQ16 * ccc3Q14;
    147 
    148     RexQx[k1] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc1Q14, akpQx, 14) -
    149         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss1Q14, bkpQx, 14); // 6 non-mul + 2 mul cycles, i.e. 8 cycles (6+2*7=20 cycles if 16x32mul)
    150     RexQx[k2] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, ajpQx, 14) -
    151         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bjpQx, 14);
    152     RexQx[k3] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc3Q14, akmQx, 14) -
    153         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss3Q14, bkmQx, 14);
    154     ImxQx[k1] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss1Q14, akpQx, 14) +
    155         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc1Q14, bkpQx, 14);
    156     ImxQx[k2] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, ajpQx, 14) +
    157         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bjpQx, 14);
    158     ImxQx[k3] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss3Q14, akmQx, 14) +
    159         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc3Q14, bkmQx, 14);
    160     //This mul segment needs 6*8 = 48 cycles for 16x16 muls, but 6*20 = 120 cycles for 16x32 muls
    161 
    162 
    163   }
    164   /* Complexity is: 51+48 = 99 cycles for 16x16 muls, but 51+120 = 171 cycles for 16x32 muls*/
    165 
    166   // transform for factor of 3
    167   kk=0;
    168   k1=20;
    169   k2=40;
    170 
    171   for (hh=0; hh<4; hh++) {
    172     for (ii=0; ii<20; ii++) {
    173       akQx = RexQx[kk];
    174       bkQx = ImxQx[kk];
    175       ajQx = RexQx[k1] + RexQx[k2];
    176       bjQx = ImxQx[k1] + ImxQx[k2];
    177       RexQx[kk] = akQx + ajQx;
    178       ImxQx[kk] = bkQx + bjQx;
    179       tmp116 = ajQx >> 1;
    180       tmp216 = bjQx >> 1;
    181       akQx = akQx - tmp116;
    182       bkQx = bkQx - tmp216;
    183       tmp116 = RexQx[k1] - RexQx[k2];
    184       tmp216 = ImxQx[k1] - ImxQx[k2];
    185 
    186       ajQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss60Q14, tmp116, 14); // Q14*Qx>>14 = Qx
    187       bjQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss60Q14, tmp216, 14); // Q14*Qx>>14 = Qx
    188       RexQx[k1] = akQx - bjQx;
    189       RexQx[k2] = akQx + bjQx;
    190       ImxQx[k1] = bkQx + ajQx;
    191       ImxQx[k2] = bkQx - ajQx;
    192 
    193       kk++;
    194       k1++;
    195       k2++;
    196     }
    197     /* Complexity : (31+6)*20 = 740 cycles for 16x16 muls, but (31+18)*20 = 980 cycles for 16x32 muls*/
    198     kk=kk+40;
    199     k1=k1+40;
    200     k2=k2+40;
    201   }
    202   /* Complexity : 4*(740+3) = 2972 cycles for 16x16 muls, but 4*(980+3) = 3932 cycles for 16x32 muls*/
    203 
    204   /* multiply by rotation factor for odd factor 3 or 5 (not for 4)
    205      Same code (duplicated) for both ii=2 and ii=3 */
    206   kk = 1;
    207   ee = 0;
    208   ff = 0;
    209 
    210   for (gg=0; gg<19; gg++) {
    211     kk += 20;
    212     ff = ff+4;
    213     for (hh=0; hh<2; hh++) {
    214       ee = ff + hh * ff;
    215       dd = ee + 60;
    216       ccc2Q14 = kCosTabFfftQ14[ee];
    217       sss2Q14 = kCosTabFfftQ14[dd];
    218       if (iSign==1) {
    219         sss2Q14 = -sss2Q14;
    220       }
    221       for (ii=0; ii<4; ii++) {
    222         akQx = RexQx[kk];
    223         bkQx = ImxQx[kk];
    224         RexQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, akQx, 14) - // Q14*Qx>>14 = Qx
    225             (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bkQx, 14);
    226         ImxQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, akQx, 14) + // Q14*Qx>>14 = Qx
    227             (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bkQx, 14);
    228 
    229 
    230         kk += 60;
    231       }
    232       kk = kk - 220;
    233     }
    234     // Complexity: 2*(13+5+4*13+2) = 144 for 16x16 muls, but 2*(13+5+4*33+2) = 304 cycles for 16x32 muls
    235     kk = kk - 59;
    236   }
    237   // Complexity: 19*144 = 2736 for 16x16 muls, but 19*304 = 5776 cycles for 16x32 muls
    238 
    239   // transform for factor of 5
    240   kk = 0;
    241   ccc2Q14 = kCosTabFfftQ14[96];
    242   sss2Q14 = kCosTabFfftQ14[84];
    243   if (iSign==1) {
    244     sss2Q14 = -sss2Q14;
    245   }
    246 
    247   for (hh=0; hh<4; hh++) {
    248     for (ii=0; ii<12; ii++) {
    249       k1 = kk + 4;
    250       k2 = k1 + 4;
    251       k3 = k2 + 4;
    252       k4 = k3 + 4;
    253 
    254       akpQx = RexQx[k1] + RexQx[k4];
    255       akmQx = RexQx[k1] - RexQx[k4];
    256       bkpQx = ImxQx[k1] + ImxQx[k4];
    257       bkmQx = ImxQx[k1] - ImxQx[k4];
    258       ajpQx = RexQx[k2] + RexQx[k3];
    259       ajmQx = RexQx[k2] - RexQx[k3];
    260       bjpQx = ImxQx[k2] + ImxQx[k3];
    261       bjmQx = ImxQx[k2] - ImxQx[k3];
    262       aaQx = RexQx[kk];
    263       bbQx = ImxQx[kk];
    264       RexQx[kk] = aaQx + akpQx + ajpQx;
    265       ImxQx[kk] = bbQx + bkpQx + bjpQx;
    266 
    267       akQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, akpQx, 14) +
    268           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, ajpQx, 14)  + aaQx;
    269       bkQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, bkpQx, 14) +
    270           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bjpQx, 14)  + bbQx;
    271       ajQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, akmQx, 14) +
    272           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, ajmQx, 14);
    273       bjQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, bkmQx, 14) +
    274           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bjmQx, 14);
    275       // 32+4*8=64 or 32+4*20=112
    276 
    277       RexQx[k1] = akQx - bjQx;
    278       RexQx[k4] = akQx + bjQx;
    279       ImxQx[k1] = bkQx + ajQx;
    280       ImxQx[k4] = bkQx - ajQx;
    281 
    282       akQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, akpQx, 14)  +
    283           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, ajpQx, 14) + aaQx;
    284       bkQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bkpQx, 14)  +
    285           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, bjpQx, 14) + bbQx;
    286       ajQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, akmQx, 14) -
    287           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, ajmQx, 14);
    288       bjQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bkmQx, 14) -
    289           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, bjmQx, 14);
    290       // 8+4*8=40 or 8+4*20=88
    291 
    292       RexQx[k2] = akQx - bjQx;
    293       RexQx[k3] = akQx + bjQx;
    294       ImxQx[k2] = bkQx + ajQx;
    295       ImxQx[k3] = bkQx - ajQx;
    296 
    297       kk = k4 + 4;
    298     }
    299     // Complexity: 12*(64+40+10) = 1368 for 16x16 muls, but 12*(112+88+10) = 2520 cycles for 16x32 muls
    300     kk -= 239;
    301   }
    302   // Complexity: 4*1368 = 5472 for 16x16 muls, but 4*2520 = 10080 cycles for 16x32 muls
    303 
    304   /* multiply by rotation factor for odd factor 3 or 5 (not for 4)
    305      Same code (duplicated) for both ii=2 and ii=3 */
    306   kk = 1;
    307   ee=0;
    308 
    309   for (gg=0; gg<3; gg++) {
    310     kk += 4;
    311     dd = 12 + 12 * gg;
    312     ff = 0;
    313     for (hh=0; hh<4; hh++) {
    314       ff = ff+dd;
    315       ee = ff+60;
    316       for (ii=0; ii<12; ii++) {
    317         akQx = RexQx[kk];
    318         bkQx = ImxQx[kk];
    319 
    320         ccc2Q14 = kCosTabFfftQ14[ff];
    321         sss2Q14 = kCosTabFfftQ14[ee];
    322 
    323         if (iSign==1) {
    324           sss2Q14 = -sss2Q14;
    325         }
    326 
    327         RexQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, akQx, 14) -
    328             (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bkQx, 14);
    329         ImxQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, akQx, 14) +
    330             (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bkQx, 14);
    331 
    332         kk += 20;
    333       }
    334       kk = kk - 236;
    335       // Complexity: 12*(12+12) = 288 for 16x16 muls, but 12*(12+32) = 528 cycles for 16x32 muls
    336     }
    337     kk = kk - 19;
    338     // Complexity: 4*288+6 for 16x16 muls, but 4*528+6 cycles for 16x32 muls
    339   }
    340   // Complexity: 3*4*288+6 = 3462 for 16x16 muls, but 3*4*528+6 = 6342 cycles for 16x32 muls
    341 
    342 
    343   // last transform for factor of 4 */
    344   for (kk=0; kk<240; kk=kk+4) {
    345     k1 = kk + 1;
    346     k2 = k1 + 1;
    347     k3 = k2 + 1;
    348 
    349     akpQx = RexQx[kk] + RexQx[k2];
    350     akmQx = RexQx[kk] - RexQx[k2];
    351     ajpQx = RexQx[k1] + RexQx[k3];
    352     ajmQx = RexQx[k1] - RexQx[k3];
    353     bkpQx = ImxQx[kk] + ImxQx[k2];
    354     bkmQx = ImxQx[kk] - ImxQx[k2];
    355     bjpQx = ImxQx[k1] + ImxQx[k3];
    356     bjmQx = ImxQx[k1] - ImxQx[k3];
    357     RexQx[kk] = akpQx + ajpQx;
    358     ImxQx[kk] = bkpQx + bjpQx;
    359     ajpQx = akpQx - ajpQx;
    360     bjpQx = bkpQx - bjpQx;
    361     if (iSign < 0) {
    362       akpQx = akmQx + bjmQx;
    363       bkpQx = bkmQx - ajmQx;
    364       akmQx -= bjmQx;
    365       bkmQx += ajmQx;
    366     } else {
    367       akpQx = akmQx - bjmQx;
    368       bkpQx = bkmQx + ajmQx;
    369       akmQx += bjmQx;
    370       bkmQx -= ajmQx;
    371     }
    372     RexQx[k1] = akpQx;
    373     RexQx[k2] = ajpQx;
    374     RexQx[k3] = akmQx;
    375     ImxQx[k1] = bkpQx;
    376     ImxQx[k2] = bjpQx;
    377     ImxQx[k3] = bkmQx;
    378   }
    379   // Complexity: 60*45 = 2700 for 16x16 muls, but 60*45 = 2700 cycles for 16x32 muls
    380 
    381   /* permute the results to normal order */
    382   for (ii=0; ii<240; ii++) {
    383     ReDATAQx[ii]=RexQx[ii];
    384     ImDATAQx[ii]=ImxQx[ii];
    385   }
    386   // Complexity: 240*2=480 cycles
    387 
    388   for (ii=0; ii<240; ii++) {
    389     RexQx[ii]=ReDATAQx[kSortTabFft[ii]];
    390     ImxQx[ii]=ImDATAQx[kSortTabFft[ii]];
    391   }
    392   // Complexity: 240*2*2=960 cycles
    393 
    394   // Total complexity:
    395   //            16x16 16x32
    396   // Complexity:   10    10
    397   // Complexity:   99   171
    398   // Complexity: 2972  3932
    399   // Complexity: 2736  5776
    400   // Complexity: 5472 10080
    401   // Complexity: 3462  6342
    402   // Complexity: 2700  2700
    403   // Complexity:  480   480
    404   // Complexity:  960   960
    405   // =======================
    406   //            18891 30451
    407   //
    408   // If this FFT is called 2 time each frame, i.e. 67 times per second, it will correspond to
    409   // a C54 complexity of 67*18891/1000000 = 1.27 MIPS with 16x16-muls, and 67*30451/1000000 =
    410   // = 2.04 MIPS with 16x32-muls. Note that this routine somtimes is called 6 times during the
    411   // encoding of a frame, i.e. the max complexity would be 7/2*1.27 = 4.4 MIPS for the 16x16 mul case.
    412 
    413 
    414   return 0;
    415 }
    416