Home | History | Annotate | Download | only in libmpdec
      1 /*
      2  * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
     16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 
     29 #include "mpdecimal.h"
     30 #include <stdio.h>
     31 #include <stdlib.h>
     32 #include <string.h>
     33 #include <limits.h>
     34 #include <assert.h>
     35 #include "bits.h"
     36 #include "constants.h"
     37 #include "typearith.h"
     38 #include "transpose.h"
     39 
     40 
     41 #define BUFSIZE 4096
     42 #define SIDE 128
     43 
     44 
     45 /* Bignum: The transpose functions are used for very large transforms
     46    in sixstep.c and fourstep.c. */
     47 
     48 
     49 /* Definition of the matrix transpose */
     50 void
     51 std_trans(mpd_uint_t dest[], mpd_uint_t src[], mpd_size_t rows, mpd_size_t cols)
     52 {
     53     mpd_size_t idest, isrc;
     54     mpd_size_t r, c;
     55 
     56     for (r = 0; r < rows; r++) {
     57         isrc = r * cols;
     58         idest = r;
     59         for (c = 0; c < cols; c++) {
     60             dest[idest] = src[isrc];
     61             isrc += 1;
     62             idest += rows;
     63         }
     64     }
     65 }
     66 
     67 /*
     68  * Swap half-rows of 2^n * (2*2^n) matrix.
     69  * FORWARD_CYCLE: even/odd permutation of the halfrows.
     70  * BACKWARD_CYCLE: reverse the even/odd permutation.
     71  */
     72 static int
     73 swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir)
     74 {
     75     mpd_uint_t buf1[BUFSIZE];
     76     mpd_uint_t buf2[BUFSIZE];
     77     mpd_uint_t *readbuf, *writebuf, *hp;
     78     mpd_size_t *done, dbits;
     79     mpd_size_t b = BUFSIZE, stride;
     80     mpd_size_t hn, hmax; /* halfrow number */
     81     mpd_size_t m, r=0;
     82     mpd_size_t offset;
     83     mpd_size_t next;
     84 
     85 
     86     assert(cols == mul_size_t(2, rows));
     87 
     88     if (dir == FORWARD_CYCLE) {
     89         r = rows;
     90     }
     91     else if (dir == BACKWARD_CYCLE) {
     92         r = 2;
     93     }
     94     else {
     95         abort(); /* GCOV_NOT_REACHED */
     96     }
     97 
     98     m = cols - 1;
     99     hmax = rows; /* cycles start at odd halfrows */
    100     dbits = 8 * sizeof *done;
    101     if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) {
    102         return 0;
    103     }
    104 
    105     for (hn = 1; hn <= hmax; hn += 2) {
    106 
    107         if (done[hn/dbits] & mpd_bits[hn%dbits]) {
    108             continue;
    109         }
    110 
    111         readbuf = buf1; writebuf = buf2;
    112 
    113         for (offset = 0; offset < cols/2; offset += b) {
    114 
    115             stride = (offset + b < cols/2) ? b : cols/2-offset;
    116 
    117             hp = matrix + hn*cols/2;
    118             memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
    119             pointerswap(&readbuf, &writebuf);
    120 
    121             next = mulmod_size_t(hn, r, m);
    122             hp = matrix + next*cols/2;
    123 
    124             while (next != hn) {
    125 
    126                 memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
    127                 memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
    128                 pointerswap(&readbuf, &writebuf);
    129 
    130                 done[next/dbits] |= mpd_bits[next%dbits];
    131 
    132                 next = mulmod_size_t(next, r, m);
    133                     hp = matrix + next*cols/2;
    134 
    135             }
    136 
    137             memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
    138 
    139             done[hn/dbits] |= mpd_bits[hn%dbits];
    140         }
    141     }
    142 
    143     mpd_free(done);
    144     return 1;
    145 }
    146 
    147 /* In-place transpose of a square matrix */
    148 static inline void
    149 squaretrans(mpd_uint_t *buf, mpd_size_t cols)
    150 {
    151     mpd_uint_t tmp;
    152     mpd_size_t idest, isrc;
    153     mpd_size_t r, c;
    154 
    155     for (r = 0; r < cols; r++) {
    156         c = r+1;
    157         isrc = r*cols + c;
    158         idest = c*cols + r;
    159         for (c = r+1; c < cols; c++) {
    160             tmp = buf[isrc];
    161             buf[isrc] = buf[idest];
    162             buf[idest] = tmp;
    163             isrc += 1;
    164             idest += cols;
    165         }
    166     }
    167 }
    168 
    169 /*
    170  * Transpose 2^n * 2^n matrix. For cache efficiency, the matrix is split into
    171  * square blocks with side length 'SIDE'. First, the blocks are transposed,
    172  * then a square transposition is done on each individual block.
    173  */
    174 static void
    175 squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size)
    176 {
    177     mpd_uint_t buf1[SIDE*SIDE];
    178     mpd_uint_t buf2[SIDE*SIDE];
    179     mpd_uint_t *to, *from;
    180     mpd_size_t b = size;
    181     mpd_size_t r, c;
    182     mpd_size_t i;
    183 
    184     while (b > SIDE) b >>= 1;
    185 
    186     for (r = 0; r < size; r += b) {
    187 
    188         for (c = r; c < size; c += b) {
    189 
    190             from = matrix + r*size + c;
    191             to = buf1;
    192             for (i = 0; i < b; i++) {
    193                 memcpy(to, from, b*(sizeof *to));
    194                 from += size;
    195                 to += b;
    196             }
    197             squaretrans(buf1, b);
    198 
    199             if (r == c) {
    200                 to = matrix + r*size + c;
    201                 from = buf1;
    202                 for (i = 0; i < b; i++) {
    203                     memcpy(to, from, b*(sizeof *to));
    204                     from += b;
    205                     to += size;
    206                 }
    207                 continue;
    208             }
    209             else {
    210                 from = matrix + c*size + r;
    211                 to = buf2;
    212                 for (i = 0; i < b; i++) {
    213                     memcpy(to, from, b*(sizeof *to));
    214                     from += size;
    215                     to += b;
    216                 }
    217                 squaretrans(buf2, b);
    218 
    219                 to = matrix + c*size + r;
    220                 from = buf1;
    221                 for (i = 0; i < b; i++) {
    222                     memcpy(to, from, b*(sizeof *to));
    223                     from += b;
    224                     to += size;
    225                 }
    226 
    227                 to = matrix + r*size + c;
    228                 from = buf2;
    229                 for (i = 0; i < b; i++) {
    230                     memcpy(to, from, b*(sizeof *to));
    231                     from += b;
    232                     to += size;
    233                 }
    234             }
    235         }
    236     }
    237 
    238 }
    239 
    240 /*
    241  * In-place transposition of a 2^n x 2^n or a 2^n x (2*2^n)
    242  * or a (2*2^n) x 2^n matrix.
    243  */
    244 int
    245 transpose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols)
    246 {
    247     mpd_size_t size = mul_size_t(rows, cols);
    248 
    249     assert(ispower2(rows));
    250     assert(ispower2(cols));
    251 
    252     if (cols == rows) {
    253         squaretrans_pow2(matrix, rows);
    254     }
    255     else if (cols == mul_size_t(2, rows)) {
    256         if (!swap_halfrows_pow2(matrix, rows, cols, FORWARD_CYCLE)) {
    257             return 0;
    258         }
    259         squaretrans_pow2(matrix, rows);
    260         squaretrans_pow2(matrix+(size/2), rows);
    261     }
    262     else if (rows == mul_size_t(2, cols)) {
    263         squaretrans_pow2(matrix, cols);
    264         squaretrans_pow2(matrix+(size/2), cols);
    265         if (!swap_halfrows_pow2(matrix, cols, rows, BACKWARD_CYCLE)) {
    266             return 0;
    267         }
    268     }
    269     else {
    270         abort(); /* GCOV_NOT_REACHED */
    271     }
    272 
    273     return 1;
    274 }
    275 
    276 
    277