Home | History | Annotate | Download | only in libmpdec
      1 /*
      2  * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
     16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 
     29 #include "mpdecimal.h"
     30 #include <stdio.h>
     31 #include <assert.h>
     32 #include "bits.h"
     33 #include "numbertheory.h"
     34 #include "umodarith.h"
     35 #include "difradix2.h"
     36 
     37 
     38 /* Bignum: The actual transform routine (decimation in frequency). */
     39 
     40 
     41 /*
     42  * Generate index pairs (x, bitreverse(x)) and carry out the permutation.
     43  * n must be a power of two.
     44  * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational",
     45  * Chapter 1.14.4. [http://www.jjj.de/fxt/]
     46  */
     47 static inline void
     48 bitreverse_permute(mpd_uint_t a[], mpd_size_t n)
     49 {
     50     mpd_size_t x = 0;
     51     mpd_size_t r = 0;
     52     mpd_uint_t t;
     53 
     54     do { /* Invariant: r = bitreverse(x) */
     55         if (r > x) {
     56             t = a[x];
     57             a[x] = a[r];
     58             a[r] = t;
     59         }
     60         /* Flip trailing consecutive 1 bits and the first zero bit
     61          * that absorbs a possible carry. */
     62         x += 1;
     63         /* Mirror the operation on r: Flip n_trailing_zeros(x)+1
     64            high bits of r. */
     65         r ^= (n - (n >> (mpd_bsf(x)+1)));
     66         /* The loop invariant is preserved. */
     67     } while (x < n);
     68 }
     69 
     70 
     71 /* Fast Number Theoretic Transform, decimation in frequency. */
     72 void
     73 fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams)
     74 {
     75     mpd_uint_t *wtable = tparams->wtable;
     76     mpd_uint_t umod;
     77 #ifdef PPRO
     78     double dmod;
     79     uint32_t dinvmod[3];
     80 #endif
     81     mpd_uint_t u0, u1, v0, v1;
     82     mpd_uint_t w, w0, w1, wstep;
     83     mpd_size_t m, mhalf;
     84     mpd_size_t j, r;
     85 
     86 
     87     assert(ispower2(n));
     88     assert(n >= 4);
     89 
     90     SETMODULUS(tparams->modnum);
     91 
     92     /* m == n */
     93     mhalf = n / 2;
     94     for (j = 0; j < mhalf; j += 2) {
     95 
     96         w0 = wtable[j];
     97         w1 = wtable[j+1];
     98 
     99         u0 = a[j];
    100         v0 = a[j+mhalf];
    101 
    102         u1 = a[j+1];
    103         v1 = a[j+1+mhalf];
    104 
    105         a[j] = addmod(u0, v0, umod);
    106         v0 = submod(u0, v0, umod);
    107 
    108         a[j+1] = addmod(u1, v1, umod);
    109         v1 = submod(u1, v1, umod);
    110 
    111         MULMOD2(&v0, w0, &v1, w1);
    112 
    113         a[j+mhalf] = v0;
    114         a[j+1+mhalf] = v1;
    115 
    116     }
    117 
    118     wstep = 2;
    119     for (m = n/2; m >= 2; m>>=1, wstep<<=1) {
    120 
    121         mhalf = m / 2;
    122 
    123         /* j == 0 */
    124         for (r = 0; r < n; r += 2*m) {
    125 
    126             u0 = a[r];
    127             v0 = a[r+mhalf];
    128 
    129             u1 = a[m+r];
    130             v1 = a[m+r+mhalf];
    131 
    132             a[r] = addmod(u0, v0, umod);
    133             v0 = submod(u0, v0, umod);
    134 
    135             a[m+r] = addmod(u1, v1, umod);
    136             v1 = submod(u1, v1, umod);
    137 
    138             a[r+mhalf] = v0;
    139             a[m+r+mhalf] = v1;
    140         }
    141 
    142         for (j = 1; j < mhalf; j++) {
    143 
    144             w = wtable[j*wstep];
    145 
    146             for (r = 0; r < n; r += 2*m) {
    147 
    148                 u0 = a[r+j];
    149                 v0 = a[r+j+mhalf];
    150 
    151                 u1 = a[m+r+j];
    152                 v1 = a[m+r+j+mhalf];
    153 
    154                 a[r+j] = addmod(u0, v0, umod);
    155                 v0 = submod(u0, v0, umod);
    156 
    157                 a[m+r+j] = addmod(u1, v1, umod);
    158                 v1 = submod(u1, v1, umod);
    159 
    160                 MULMOD2C(&v0, &v1, w);
    161 
    162                 a[r+j+mhalf] = v0;
    163                 a[m+r+j+mhalf] = v1;
    164             }
    165 
    166         }
    167 
    168     }
    169 
    170     bitreverse_permute(a, n);
    171 }
    172 
    173 
    174