1 /* 2 * Copyright (c) 2008-2016 Stefan Krah. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 29 #include "mpdecimal.h" 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <assert.h> 33 #include "bits.h" 34 #include "difradix2.h" 35 #include "numbertheory.h" 36 #include "transpose.h" 37 #include "umodarith.h" 38 #include "sixstep.h" 39 40 41 /* Bignum: Cache efficient Matrix Fourier Transform for arrays of the 42 form 2**n (See literature/six-step.txt). */ 43 44 45 /* forward transform with sign = -1 */ 46 int 47 six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum) 48 { 49 struct fnt_params *tparams; 50 mpd_size_t log2n, C, R; 51 mpd_uint_t kernel; 52 mpd_uint_t umod; 53 #ifdef PPRO 54 double dmod; 55 uint32_t dinvmod[3]; 56 #endif 57 mpd_uint_t *x, w0, w1, wstep; 58 mpd_size_t i, k; 59 60 61 assert(ispower2(n)); 62 assert(n >= 16); 63 assert(n <= MPD_MAXTRANSFORM_2N); 64 65 log2n = mpd_bsr(n); 66 C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */ 67 R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */ 68 69 70 /* Transpose the matrix. */ 71 if (!transpose_pow2(a, R, C)) { 72 return 0; 73 } 74 75 /* Length R transform on the rows. */ 76 if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) { 77 return 0; 78 } 79 for (x = a; x < a+n; x += R) { 80 fnt_dif2(x, R, tparams); 81 } 82 83 /* Transpose the matrix. */ 84 if (!transpose_pow2(a, C, R)) { 85 mpd_free(tparams); 86 return 0; 87 } 88 89 /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */ 90 SETMODULUS(modnum); 91 kernel = _mpd_getkernel(n, -1, modnum); 92 for (i = 1; i < R; i++) { 93 w0 = 1; /* r**(i*0): initial value for k=0 */ 94 w1 = POWMOD(kernel, i); /* r**(i*1): initial value for k=1 */ 95 wstep = MULMOD(w1, w1); /* r**(2*i) */ 96 for (k = 0; k < C; k += 2) { 97 mpd_uint_t x0 = a[i*C+k]; 98 mpd_uint_t x1 = a[i*C+k+1]; 99 MULMOD2(&x0, w0, &x1, w1); 100 MULMOD2C(&w0, &w1, wstep); /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */ 101 a[i*C+k] = x0; 102 a[i*C+k+1] = x1; 103 } 104 } 105 106 /* Length C transform on the rows. */ 107 if (C != R) { 108 mpd_free(tparams); 109 if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) { 110 return 0; 111 } 112 } 113 for (x = a; x < a+n; x += C) { 114 fnt_dif2(x, C, tparams); 115 } 116 mpd_free(tparams); 117 118 #if 0 119 /* An unordered transform is sufficient for convolution. */ 120 /* Transpose the matrix. */ 121 if (!transpose_pow2(a, R, C)) { 122 return 0; 123 } 124 #endif 125 126 return 1; 127 } 128 129 130 /* reverse transform, sign = 1 */ 131 int 132 inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum) 133 { 134 struct fnt_params *tparams; 135 mpd_size_t log2n, C, R; 136 mpd_uint_t kernel; 137 mpd_uint_t umod; 138 #ifdef PPRO 139 double dmod; 140 uint32_t dinvmod[3]; 141 #endif 142 mpd_uint_t *x, w0, w1, wstep; 143 mpd_size_t i, k; 144 145 146 assert(ispower2(n)); 147 assert(n >= 16); 148 assert(n <= MPD_MAXTRANSFORM_2N); 149 150 log2n = mpd_bsr(n); 151 C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */ 152 R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */ 153 154 155 #if 0 156 /* An unordered transform is sufficient for convolution. */ 157 /* Transpose the matrix, producing an R*C matrix. */ 158 if (!transpose_pow2(a, C, R)) { 159 return 0; 160 } 161 #endif 162 163 /* Length C transform on the rows. */ 164 if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) { 165 return 0; 166 } 167 for (x = a; x < a+n; x += C) { 168 fnt_dif2(x, C, tparams); 169 } 170 171 /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */ 172 SETMODULUS(modnum); 173 kernel = _mpd_getkernel(n, 1, modnum); 174 for (i = 1; i < R; i++) { 175 w0 = 1; 176 w1 = POWMOD(kernel, i); 177 wstep = MULMOD(w1, w1); 178 for (k = 0; k < C; k += 2) { 179 mpd_uint_t x0 = a[i*C+k]; 180 mpd_uint_t x1 = a[i*C+k+1]; 181 MULMOD2(&x0, w0, &x1, w1); 182 MULMOD2C(&w0, &w1, wstep); 183 a[i*C+k] = x0; 184 a[i*C+k+1] = x1; 185 } 186 } 187 188 /* Transpose the matrix. */ 189 if (!transpose_pow2(a, R, C)) { 190 mpd_free(tparams); 191 return 0; 192 } 193 194 /* Length R transform on the rows. */ 195 if (R != C) { 196 mpd_free(tparams); 197 if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) { 198 return 0; 199 } 200 } 201 for (x = a; x < a+n; x += R) { 202 fnt_dif2(x, R, tparams); 203 } 204 mpd_free(tparams); 205 206 /* Transpose the matrix. */ 207 if (!transpose_pow2(a, C, R)) { 208 return 0; 209 } 210 211 return 1; 212 } 213 214 215