Home | History | Annotate | Download | only in libmpdec
      1 /*
      2  * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
     16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 
     29 #include "mpdecimal.h"
     30 #include <stdio.h>
     31 #include "bits.h"
     32 #include "constants.h"
     33 #include "fnt.h"
     34 #include "fourstep.h"
     35 #include "numbertheory.h"
     36 #include "sixstep.h"
     37 #include "umodarith.h"
     38 #include "convolute.h"
     39 
     40 
     41 /* Bignum: Fast convolution using the Number Theoretic Transform. Used for
     42    the multiplication of very large coefficients. */
     43 
     44 
     45 /* Convolute the data in c1 and c2. Result is in c1. */
     46 int
     47 fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
     48 {
     49     int (*fnt)(mpd_uint_t *, mpd_size_t, int);
     50     int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
     51 #ifdef PPRO
     52     double dmod;
     53     uint32_t dinvmod[3];
     54 #endif
     55     mpd_uint_t n_inv, umod;
     56     mpd_size_t i;
     57 
     58 
     59     SETMODULUS(modnum);
     60     n_inv = POWMOD(n, (umod-2));
     61 
     62     if (ispower2(n)) {
     63         if (n > SIX_STEP_THRESHOLD) {
     64             fnt = six_step_fnt;
     65             inv_fnt = inv_six_step_fnt;
     66         }
     67         else {
     68             fnt = std_fnt;
     69             inv_fnt = std_inv_fnt;
     70         }
     71     }
     72     else {
     73         fnt = four_step_fnt;
     74         inv_fnt = inv_four_step_fnt;
     75     }
     76 
     77     if (!fnt(c1, n, modnum)) {
     78         return 0;
     79     }
     80     if (!fnt(c2, n, modnum)) {
     81         return 0;
     82     }
     83     for (i = 0; i < n-1; i += 2) {
     84         mpd_uint_t x0 = c1[i];
     85         mpd_uint_t y0 = c2[i];
     86         mpd_uint_t x1 = c1[i+1];
     87         mpd_uint_t y1 = c2[i+1];
     88         MULMOD2(&x0, y0, &x1, y1);
     89         c1[i] = x0;
     90         c1[i+1] = x1;
     91     }
     92 
     93     if (!inv_fnt(c1, n, modnum)) {
     94         return 0;
     95     }
     96     for (i = 0; i < n-3; i += 4) {
     97         mpd_uint_t x0 = c1[i];
     98         mpd_uint_t x1 = c1[i+1];
     99         mpd_uint_t x2 = c1[i+2];
    100         mpd_uint_t x3 = c1[i+3];
    101         MULMOD2C(&x0, &x1, n_inv);
    102         MULMOD2C(&x2, &x3, n_inv);
    103         c1[i] = x0;
    104         c1[i+1] = x1;
    105         c1[i+2] = x2;
    106         c1[i+3] = x3;
    107     }
    108 
    109     return 1;
    110 }
    111 
    112 /* Autoconvolute the data in c1. Result is in c1. */
    113 int
    114 fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
    115 {
    116     int (*fnt)(mpd_uint_t *, mpd_size_t, int);
    117     int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
    118 #ifdef PPRO
    119     double dmod;
    120     uint32_t dinvmod[3];
    121 #endif
    122     mpd_uint_t n_inv, umod;
    123     mpd_size_t i;
    124 
    125 
    126     SETMODULUS(modnum);
    127     n_inv = POWMOD(n, (umod-2));
    128 
    129     if (ispower2(n)) {
    130         if (n > SIX_STEP_THRESHOLD) {
    131             fnt = six_step_fnt;
    132             inv_fnt = inv_six_step_fnt;
    133         }
    134         else {
    135             fnt = std_fnt;
    136             inv_fnt = std_inv_fnt;
    137         }
    138     }
    139     else {
    140         fnt = four_step_fnt;
    141         inv_fnt = inv_four_step_fnt;
    142     }
    143 
    144     if (!fnt(c1, n, modnum)) {
    145         return 0;
    146     }
    147     for (i = 0; i < n-1; i += 2) {
    148         mpd_uint_t x0 = c1[i];
    149         mpd_uint_t x1 = c1[i+1];
    150         MULMOD2(&x0, x0, &x1, x1);
    151         c1[i] = x0;
    152         c1[i+1] = x1;
    153     }
    154 
    155     if (!inv_fnt(c1, n, modnum)) {
    156         return 0;
    157     }
    158     for (i = 0; i < n-3; i += 4) {
    159         mpd_uint_t x0 = c1[i];
    160         mpd_uint_t x1 = c1[i+1];
    161         mpd_uint_t x2 = c1[i+2];
    162         mpd_uint_t x3 = c1[i+3];
    163         MULMOD2C(&x0, &x1, n_inv);
    164         MULMOD2C(&x2, &x3, n_inv);
    165         c1[i] = x0;
    166         c1[i+1] = x1;
    167         c1[i+2] = x2;
    168         c1[i+3] = x3;
    169     }
    170 
    171     return 1;
    172 }
    173 
    174 
    175