1 /* 2 * Copyright (c) 2008-2016 Stefan Krah. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 29 #include "mpdecimal.h" 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <limits.h> 34 #include <assert.h> 35 #include "bits.h" 36 #include "constants.h" 37 #include "typearith.h" 38 #include "transpose.h" 39 40 41 #define BUFSIZE 4096 42 #define SIDE 128 43 44 45 /* Bignum: The transpose functions are used for very large transforms 46 in sixstep.c and fourstep.c. */ 47 48 49 /* Definition of the matrix transpose */ 50 void 51 std_trans(mpd_uint_t dest[], mpd_uint_t src[], mpd_size_t rows, mpd_size_t cols) 52 { 53 mpd_size_t idest, isrc; 54 mpd_size_t r, c; 55 56 for (r = 0; r < rows; r++) { 57 isrc = r * cols; 58 idest = r; 59 for (c = 0; c < cols; c++) { 60 dest[idest] = src[isrc]; 61 isrc += 1; 62 idest += rows; 63 } 64 } 65 } 66 67 /* 68 * Swap half-rows of 2^n * (2*2^n) matrix. 69 * FORWARD_CYCLE: even/odd permutation of the halfrows. 70 * BACKWARD_CYCLE: reverse the even/odd permutation. 71 */ 72 static int 73 swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir) 74 { 75 mpd_uint_t buf1[BUFSIZE]; 76 mpd_uint_t buf2[BUFSIZE]; 77 mpd_uint_t *readbuf, *writebuf, *hp; 78 mpd_size_t *done, dbits; 79 mpd_size_t b = BUFSIZE, stride; 80 mpd_size_t hn, hmax; /* halfrow number */ 81 mpd_size_t m, r=0; 82 mpd_size_t offset; 83 mpd_size_t next; 84 85 86 assert(cols == mul_size_t(2, rows)); 87 88 if (dir == FORWARD_CYCLE) { 89 r = rows; 90 } 91 else if (dir == BACKWARD_CYCLE) { 92 r = 2; 93 } 94 else { 95 abort(); /* GCOV_NOT_REACHED */ 96 } 97 98 m = cols - 1; 99 hmax = rows; /* cycles start at odd halfrows */ 100 dbits = 8 * sizeof *done; 101 if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) { 102 return 0; 103 } 104 105 for (hn = 1; hn <= hmax; hn += 2) { 106 107 if (done[hn/dbits] & mpd_bits[hn%dbits]) { 108 continue; 109 } 110 111 readbuf = buf1; writebuf = buf2; 112 113 for (offset = 0; offset < cols/2; offset += b) { 114 115 stride = (offset + b < cols/2) ? b : cols/2-offset; 116 117 hp = matrix + hn*cols/2; 118 memcpy(readbuf, hp+offset, stride*(sizeof *readbuf)); 119 pointerswap(&readbuf, &writebuf); 120 121 next = mulmod_size_t(hn, r, m); 122 hp = matrix + next*cols/2; 123 124 while (next != hn) { 125 126 memcpy(readbuf, hp+offset, stride*(sizeof *readbuf)); 127 memcpy(hp+offset, writebuf, stride*(sizeof *writebuf)); 128 pointerswap(&readbuf, &writebuf); 129 130 done[next/dbits] |= mpd_bits[next%dbits]; 131 132 next = mulmod_size_t(next, r, m); 133 hp = matrix + next*cols/2; 134 135 } 136 137 memcpy(hp+offset, writebuf, stride*(sizeof *writebuf)); 138 139 done[hn/dbits] |= mpd_bits[hn%dbits]; 140 } 141 } 142 143 mpd_free(done); 144 return 1; 145 } 146 147 /* In-place transpose of a square matrix */ 148 static inline void 149 squaretrans(mpd_uint_t *buf, mpd_size_t cols) 150 { 151 mpd_uint_t tmp; 152 mpd_size_t idest, isrc; 153 mpd_size_t r, c; 154 155 for (r = 0; r < cols; r++) { 156 c = r+1; 157 isrc = r*cols + c; 158 idest = c*cols + r; 159 for (c = r+1; c < cols; c++) { 160 tmp = buf[isrc]; 161 buf[isrc] = buf[idest]; 162 buf[idest] = tmp; 163 isrc += 1; 164 idest += cols; 165 } 166 } 167 } 168 169 /* 170 * Transpose 2^n * 2^n matrix. For cache efficiency, the matrix is split into 171 * square blocks with side length 'SIDE'. First, the blocks are transposed, 172 * then a square transposition is done on each individual block. 173 */ 174 static void 175 squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size) 176 { 177 mpd_uint_t buf1[SIDE*SIDE]; 178 mpd_uint_t buf2[SIDE*SIDE]; 179 mpd_uint_t *to, *from; 180 mpd_size_t b = size; 181 mpd_size_t r, c; 182 mpd_size_t i; 183 184 while (b > SIDE) b >>= 1; 185 186 for (r = 0; r < size; r += b) { 187 188 for (c = r; c < size; c += b) { 189 190 from = matrix + r*size + c; 191 to = buf1; 192 for (i = 0; i < b; i++) { 193 memcpy(to, from, b*(sizeof *to)); 194 from += size; 195 to += b; 196 } 197 squaretrans(buf1, b); 198 199 if (r == c) { 200 to = matrix + r*size + c; 201 from = buf1; 202 for (i = 0; i < b; i++) { 203 memcpy(to, from, b*(sizeof *to)); 204 from += b; 205 to += size; 206 } 207 continue; 208 } 209 else { 210 from = matrix + c*size + r; 211 to = buf2; 212 for (i = 0; i < b; i++) { 213 memcpy(to, from, b*(sizeof *to)); 214 from += size; 215 to += b; 216 } 217 squaretrans(buf2, b); 218 219 to = matrix + c*size + r; 220 from = buf1; 221 for (i = 0; i < b; i++) { 222 memcpy(to, from, b*(sizeof *to)); 223 from += b; 224 to += size; 225 } 226 227 to = matrix + r*size + c; 228 from = buf2; 229 for (i = 0; i < b; i++) { 230 memcpy(to, from, b*(sizeof *to)); 231 from += b; 232 to += size; 233 } 234 } 235 } 236 } 237 238 } 239 240 /* 241 * In-place transposition of a 2^n x 2^n or a 2^n x (2*2^n) 242 * or a (2*2^n) x 2^n matrix. 243 */ 244 int 245 transpose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols) 246 { 247 mpd_size_t size = mul_size_t(rows, cols); 248 249 assert(ispower2(rows)); 250 assert(ispower2(cols)); 251 252 if (cols == rows) { 253 squaretrans_pow2(matrix, rows); 254 } 255 else if (cols == mul_size_t(2, rows)) { 256 if (!swap_halfrows_pow2(matrix, rows, cols, FORWARD_CYCLE)) { 257 return 0; 258 } 259 squaretrans_pow2(matrix, rows); 260 squaretrans_pow2(matrix+(size/2), rows); 261 } 262 else if (rows == mul_size_t(2, cols)) { 263 squaretrans_pow2(matrix, cols); 264 squaretrans_pow2(matrix+(size/2), cols); 265 if (!swap_halfrows_pow2(matrix, cols, rows, BACKWARD_CYCLE)) { 266 return 0; 267 } 268 } 269 else { 270 abort(); /* GCOV_NOT_REACHED */ 271 } 272 273 return 1; 274 } 275 276 277