Home | History | Annotate | Download | only in libmpdec
      1 /*
      2  * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
     16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 
     29 #include "mpdecimal.h"
     30 #include <stdio.h>
     31 #include <stdlib.h>
     32 #include <assert.h>
     33 #include "bits.h"
     34 #include "difradix2.h"
     35 #include "numbertheory.h"
     36 #include "transpose.h"
     37 #include "umodarith.h"
     38 #include "sixstep.h"
     39 
     40 
     41 /* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
     42    form 2**n (See literature/six-step.txt). */
     43 
     44 
     45 /* forward transform with sign = -1 */
     46 int
     47 six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
     48 {
     49     struct fnt_params *tparams;
     50     mpd_size_t log2n, C, R;
     51     mpd_uint_t kernel;
     52     mpd_uint_t umod;
     53 #ifdef PPRO
     54     double dmod;
     55     uint32_t dinvmod[3];
     56 #endif
     57     mpd_uint_t *x, w0, w1, wstep;
     58     mpd_size_t i, k;
     59 
     60 
     61     assert(ispower2(n));
     62     assert(n >= 16);
     63     assert(n <= MPD_MAXTRANSFORM_2N);
     64 
     65     log2n = mpd_bsr(n);
     66     C = ((mpd_size_t)1) << (log2n / 2);  /* number of columns */
     67     R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
     68 
     69 
     70     /* Transpose the matrix. */
     71     if (!transpose_pow2(a, R, C)) {
     72         return 0;
     73     }
     74 
     75     /* Length R transform on the rows. */
     76     if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) {
     77         return 0;
     78     }
     79     for (x = a; x < a+n; x += R) {
     80         fnt_dif2(x, R, tparams);
     81     }
     82 
     83     /* Transpose the matrix. */
     84     if (!transpose_pow2(a, C, R)) {
     85         mpd_free(tparams);
     86         return 0;
     87     }
     88 
     89     /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
     90     SETMODULUS(modnum);
     91     kernel = _mpd_getkernel(n, -1, modnum);
     92     for (i = 1; i < R; i++) {
     93         w0 = 1;                  /* r**(i*0): initial value for k=0 */
     94         w1 = POWMOD(kernel, i);  /* r**(i*1): initial value for k=1 */
     95         wstep = MULMOD(w1, w1);  /* r**(2*i) */
     96         for (k = 0; k < C; k += 2) {
     97             mpd_uint_t x0 = a[i*C+k];
     98             mpd_uint_t x1 = a[i*C+k+1];
     99             MULMOD2(&x0, w0, &x1, w1);
    100             MULMOD2C(&w0, &w1, wstep);  /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */
    101             a[i*C+k] = x0;
    102             a[i*C+k+1] = x1;
    103         }
    104     }
    105 
    106     /* Length C transform on the rows. */
    107     if (C != R) {
    108         mpd_free(tparams);
    109         if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) {
    110             return 0;
    111         }
    112     }
    113     for (x = a; x < a+n; x += C) {
    114         fnt_dif2(x, C, tparams);
    115     }
    116     mpd_free(tparams);
    117 
    118 #if 0
    119     /* An unordered transform is sufficient for convolution. */
    120     /* Transpose the matrix. */
    121     if (!transpose_pow2(a, R, C)) {
    122         return 0;
    123     }
    124 #endif
    125 
    126     return 1;
    127 }
    128 
    129 
    130 /* reverse transform, sign = 1 */
    131 int
    132 inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
    133 {
    134     struct fnt_params *tparams;
    135     mpd_size_t log2n, C, R;
    136     mpd_uint_t kernel;
    137     mpd_uint_t umod;
    138 #ifdef PPRO
    139     double dmod;
    140     uint32_t dinvmod[3];
    141 #endif
    142     mpd_uint_t *x, w0, w1, wstep;
    143     mpd_size_t i, k;
    144 
    145 
    146     assert(ispower2(n));
    147     assert(n >= 16);
    148     assert(n <= MPD_MAXTRANSFORM_2N);
    149 
    150     log2n = mpd_bsr(n);
    151     C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
    152     R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
    153 
    154 
    155 #if 0
    156     /* An unordered transform is sufficient for convolution. */
    157     /* Transpose the matrix, producing an R*C matrix. */
    158     if (!transpose_pow2(a, C, R)) {
    159         return 0;
    160     }
    161 #endif
    162 
    163     /* Length C transform on the rows. */
    164     if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) {
    165         return 0;
    166     }
    167     for (x = a; x < a+n; x += C) {
    168         fnt_dif2(x, C, tparams);
    169     }
    170 
    171     /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
    172     SETMODULUS(modnum);
    173     kernel = _mpd_getkernel(n, 1, modnum);
    174     for (i = 1; i < R; i++) {
    175         w0 = 1;
    176         w1 = POWMOD(kernel, i);
    177         wstep = MULMOD(w1, w1);
    178         for (k = 0; k < C; k += 2) {
    179             mpd_uint_t x0 = a[i*C+k];
    180             mpd_uint_t x1 = a[i*C+k+1];
    181             MULMOD2(&x0, w0, &x1, w1);
    182             MULMOD2C(&w0, &w1, wstep);
    183             a[i*C+k] = x0;
    184             a[i*C+k+1] = x1;
    185         }
    186     }
    187 
    188     /* Transpose the matrix. */
    189     if (!transpose_pow2(a, R, C)) {
    190         mpd_free(tparams);
    191         return 0;
    192     }
    193 
    194     /* Length R transform on the rows. */
    195     if (R != C) {
    196         mpd_free(tparams);
    197         if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) {
    198             return 0;
    199         }
    200     }
    201     for (x = a; x < a+n; x += R) {
    202         fnt_dif2(x, R, tparams);
    203     }
    204     mpd_free(tparams);
    205 
    206     /* Transpose the matrix. */
    207     if (!transpose_pow2(a, C, R)) {
    208         return 0;
    209     }
    210 
    211     return 1;
    212 }
    213 
    214 
    215