1 /* 2 * Copyright (c) 2008-2016 Stefan Krah. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 29 #include "mpdecimal.h" 30 #include <stdio.h> 31 #include <assert.h> 32 #include "bits.h" 33 #include "numbertheory.h" 34 #include "umodarith.h" 35 #include "difradix2.h" 36 37 38 /* Bignum: The actual transform routine (decimation in frequency). */ 39 40 41 /* 42 * Generate index pairs (x, bitreverse(x)) and carry out the permutation. 43 * n must be a power of two. 44 * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational", 45 * Chapter 1.14.4. [http://www.jjj.de/fxt/] 46 */ 47 static inline void 48 bitreverse_permute(mpd_uint_t a[], mpd_size_t n) 49 { 50 mpd_size_t x = 0; 51 mpd_size_t r = 0; 52 mpd_uint_t t; 53 54 do { /* Invariant: r = bitreverse(x) */ 55 if (r > x) { 56 t = a[x]; 57 a[x] = a[r]; 58 a[r] = t; 59 } 60 /* Flip trailing consecutive 1 bits and the first zero bit 61 * that absorbs a possible carry. */ 62 x += 1; 63 /* Mirror the operation on r: Flip n_trailing_zeros(x)+1 64 high bits of r. */ 65 r ^= (n - (n >> (mpd_bsf(x)+1))); 66 /* The loop invariant is preserved. */ 67 } while (x < n); 68 } 69 70 71 /* Fast Number Theoretic Transform, decimation in frequency. */ 72 void 73 fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams) 74 { 75 mpd_uint_t *wtable = tparams->wtable; 76 mpd_uint_t umod; 77 #ifdef PPRO 78 double dmod; 79 uint32_t dinvmod[3]; 80 #endif 81 mpd_uint_t u0, u1, v0, v1; 82 mpd_uint_t w, w0, w1, wstep; 83 mpd_size_t m, mhalf; 84 mpd_size_t j, r; 85 86 87 assert(ispower2(n)); 88 assert(n >= 4); 89 90 SETMODULUS(tparams->modnum); 91 92 /* m == n */ 93 mhalf = n / 2; 94 for (j = 0; j < mhalf; j += 2) { 95 96 w0 = wtable[j]; 97 w1 = wtable[j+1]; 98 99 u0 = a[j]; 100 v0 = a[j+mhalf]; 101 102 u1 = a[j+1]; 103 v1 = a[j+1+mhalf]; 104 105 a[j] = addmod(u0, v0, umod); 106 v0 = submod(u0, v0, umod); 107 108 a[j+1] = addmod(u1, v1, umod); 109 v1 = submod(u1, v1, umod); 110 111 MULMOD2(&v0, w0, &v1, w1); 112 113 a[j+mhalf] = v0; 114 a[j+1+mhalf] = v1; 115 116 } 117 118 wstep = 2; 119 for (m = n/2; m >= 2; m>>=1, wstep<<=1) { 120 121 mhalf = m / 2; 122 123 /* j == 0 */ 124 for (r = 0; r < n; r += 2*m) { 125 126 u0 = a[r]; 127 v0 = a[r+mhalf]; 128 129 u1 = a[m+r]; 130 v1 = a[m+r+mhalf]; 131 132 a[r] = addmod(u0, v0, umod); 133 v0 = submod(u0, v0, umod); 134 135 a[m+r] = addmod(u1, v1, umod); 136 v1 = submod(u1, v1, umod); 137 138 a[r+mhalf] = v0; 139 a[m+r+mhalf] = v1; 140 } 141 142 for (j = 1; j < mhalf; j++) { 143 144 w = wtable[j*wstep]; 145 146 for (r = 0; r < n; r += 2*m) { 147 148 u0 = a[r+j]; 149 v0 = a[r+j+mhalf]; 150 151 u1 = a[m+r+j]; 152 v1 = a[m+r+j+mhalf]; 153 154 a[r+j] = addmod(u0, v0, umod); 155 v0 = submod(u0, v0, umod); 156 157 a[m+r+j] = addmod(u1, v1, umod); 158 v1 = submod(u1, v1, umod); 159 160 MULMOD2C(&v0, &v1, w); 161 162 a[r+j+mhalf] = v0; 163 a[m+r+j+mhalf] = v1; 164 } 165 166 } 167 168 } 169 170 bitreverse_permute(a, n); 171 } 172 173 174