1 /** Compute the matrix inverse via Gauss-Jordan elimination. 2 * This program uses only barriers to separate computation steps but no 3 * mutexes. It is an example of a race-free program on which no data races 4 * are reported by the happens-before algorithm (drd), but a lot of data races 5 * (all false positives) are reported by the Eraser-algorithm (helgrind). 6 */ 7 8 9 #define _GNU_SOURCE 10 11 /***********************/ 12 /* Include directives. */ 13 /***********************/ 14 15 #include <assert.h> 16 #include <math.h> 17 #include <limits.h> // PTHREAD_STACK_MIN 18 #include <pthread.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <unistd.h> // getopt() 22 23 24 /*********************/ 25 /* Type definitions. */ 26 /*********************/ 27 28 typedef double elem_t; 29 30 struct gj_threadinfo 31 { 32 pthread_barrier_t* b; 33 pthread_t tid; 34 elem_t* a; 35 int rows; 36 int cols; 37 int r0; 38 int r1; 39 }; 40 41 42 /********************/ 43 /* Local variables. */ 44 /********************/ 45 46 static int s_nthread = 1; 47 48 49 /*************************/ 50 /* Function definitions. */ 51 /*************************/ 52 53 /** Allocate memory for a matrix with the specified number of rows and 54 * columns. 55 */ 56 static elem_t* new_matrix(const int rows, const int cols) 57 { 58 assert(rows > 0); 59 assert(cols > 0); 60 return malloc(rows * cols * sizeof(elem_t)); 61 } 62 63 /** Free the memory that was allocated for a matrix. */ 64 static void delete_matrix(elem_t* const a) 65 { 66 free(a); 67 } 68 69 /** Fill in some numbers in a matrix. */ 70 static void init_matrix(elem_t* const a, const int rows, const int cols) 71 { 72 int i, j; 73 for (i = 0; i < rows; i++) 74 { 75 for (j = 0; j < rows; j++) 76 { 77 a[i * cols + j] = 1.0 / (1 + abs(i-j)); 78 } 79 } 80 } 81 82 /** Print all elements of a matrix. */ 83 void print_matrix(const char* const label, 84 const elem_t* const a, const int rows, const int cols) 85 { 86 int i, j; 87 printf("%s:\n", label); 88 for (i = 0; i < rows; i++) 89 { 90 for (j = 0; j < cols; j++) 91 { 92 printf("%g ", a[i * cols + j]); 93 } 94 printf("\n"); 95 } 96 } 97 98 /** Copy a subset of the elements of a matrix into another matrix. */ 99 static void copy_matrix(const elem_t* const from, 100 const int from_rows, 101 const int from_cols, 102 const int from_row_first, 103 const int from_row_last, 104 const int from_col_first, 105 const int from_col_last, 106 elem_t* const to, 107 const int to_rows, 108 const int to_cols, 109 const int to_row_first, 110 const int to_row_last, 111 const int to_col_first, 112 const int to_col_last) 113 { 114 int i, j; 115 116 assert(from_row_last - from_row_first == to_row_last - to_row_first); 117 assert(from_col_last - from_col_first == to_col_last - to_col_first); 118 119 for (i = from_row_first; i < from_row_last; i++) 120 { 121 assert(i < from_rows); 122 assert(i - from_row_first + to_row_first < to_rows); 123 for (j = from_col_first; j < from_col_last; j++) 124 { 125 assert(j < from_cols); 126 assert(j - from_col_first + to_col_first < to_cols); 127 to[(i - from_row_first + to_col_first) * to_cols 128 + (j - from_col_first + to_col_first)] 129 = from[i * from_cols + j]; 130 } 131 } 132 } 133 134 /** Compute the matrix product of a1 and a2. */ 135 static elem_t* multiply_matrices(const elem_t* const a1, 136 const int rows1, 137 const int cols1, 138 const elem_t* const a2, 139 const int rows2, 140 const int cols2) 141 { 142 int i, j, k; 143 elem_t* prod; 144 145 assert(cols1 == rows2); 146 147 prod = new_matrix(rows1, cols2); 148 for (i = 0; i < rows1; i++) 149 { 150 for (j = 0; j < cols2; j++) 151 { 152 prod[i * cols2 + j] = 0; 153 for (k = 0; k < cols1; k++) 154 { 155 prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j]; 156 } 157 } 158 } 159 return prod; 160 } 161 162 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting 163 * at row r0 and up to but not including row r1. It is assumed that as many 164 * threads execute this function concurrently as the count barrier p->b was 165 * initialized with. If the matrix p->a is nonsingular, and if matrix p->a 166 * has at least as many columns as rows, the result of this algorithm is that 167 * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix. 168 * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination 169 */ 170 static void gj_threadfunc(struct gj_threadinfo* p) 171 { 172 int i, j, k; 173 elem_t* const a = p->a; 174 const int rows = p->rows; 175 const int cols = p->cols; 176 177 for (i = 0; i < p->rows; i++) 178 { 179 if (pthread_barrier_wait(p->b) == PTHREAD_BARRIER_SERIAL_THREAD) 180 { 181 // Pivoting. 182 j = i; 183 for (k = i + 1; k < rows; k++) 184 { 185 if (a[k * cols + i] > a[j * cols + i]) 186 { 187 j = k; 188 } 189 } 190 if (j != i) 191 { 192 for (k = 0; k < cols; k++) 193 { 194 const elem_t t = a[i * cols + k]; 195 a[i * cols + k] = a[j * cols + k]; 196 a[j * cols + k] = t; 197 } 198 } 199 // Normalize row i. 200 if (a[i * cols + i] != 0) 201 { 202 for (k = cols - 1; k >= 0; k--) 203 { 204 a[i * cols + k] /= a[i * cols + i]; 205 } 206 } 207 } 208 pthread_barrier_wait(p->b); 209 // Reduce all rows j != i. 210 for (j = p->r0; j < p->r1; j++) 211 { 212 if (i != j) 213 { 214 const elem_t factor = a[j * cols + i]; 215 for (k = 0; k < cols; k++) 216 { 217 a[j * cols + k] -= a[i * cols + k] * factor; 218 } 219 } 220 } 221 } 222 } 223 224 /** Multithreaded Gauss-Jordan algorithm. */ 225 static void gj(elem_t* const a, const int rows, const int cols) 226 { 227 int i; 228 struct gj_threadinfo* t; 229 pthread_barrier_t b; 230 pthread_attr_t attr; 231 int err; 232 233 assert(rows <= cols); 234 235 t = malloc(sizeof(struct gj_threadinfo) * s_nthread); 236 237 pthread_barrier_init(&b, 0, s_nthread); 238 239 pthread_attr_init(&attr); 240 err = pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN + 4096); 241 assert(err == 0); 242 243 for (i = 0; i < s_nthread; i++) 244 { 245 t[i].b = &b; 246 t[i].a = a; 247 t[i].rows = rows; 248 t[i].cols = cols; 249 t[i].r0 = i * rows / s_nthread; 250 t[i].r1 = (i+1) * rows / s_nthread; 251 pthread_create(&t[i].tid, &attr, (void*(*)(void*))gj_threadfunc, &t[i]); 252 } 253 254 pthread_attr_destroy(&attr); 255 256 for (i = 0; i < s_nthread; i++) 257 { 258 pthread_join(t[i].tid, 0); 259 } 260 261 pthread_barrier_destroy(&b); 262 263 free(t); 264 } 265 266 /** Matrix inversion via the Gauss-Jordan algorithm. */ 267 static elem_t* invert_matrix(const elem_t* const a, const int n) 268 { 269 int i, j; 270 elem_t* const inv = new_matrix(n, n); 271 elem_t* const tmp = new_matrix(n, 2*n); 272 copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n); 273 for (i = 0; i < n; i++) 274 for (j = 0; j < n; j++) 275 tmp[i * 2 * n + n + j] = (i == j); 276 gj(tmp, n, 2*n); 277 copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n); 278 delete_matrix(tmp); 279 return inv; 280 } 281 282 /** Compute the average square error between the identity matrix and the 283 * product of matrix a with its inverse matrix. 284 */ 285 static double identity_error(const elem_t* const a, const int n) 286 { 287 int i, j; 288 elem_t e = 0; 289 for (i = 0; i < n; i++) 290 { 291 for (j = 0; j < n; j++) 292 { 293 const elem_t d = a[i * n + j] - (i == j); 294 e += d * d; 295 } 296 } 297 return sqrt(e / (n * n)); 298 } 299 300 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the 301 * smallest number for which the sum of one and that number is different of 302 * one. It is assumed that the underlying representation of elem_t uses 303 * base two. 304 */ 305 static elem_t epsilon() 306 { 307 elem_t eps; 308 for (eps = 1; 1 + eps != 1; eps /= 2) 309 ; 310 return 2 * eps; 311 } 312 313 int main(int argc, char** argv) 314 { 315 int matrix_size; 316 int silent = 0; 317 int optchar; 318 elem_t *a, *inv, *prod; 319 elem_t eps; 320 double error; 321 double ratio; 322 323 while ((optchar = getopt(argc, argv, "qt:")) != EOF) 324 { 325 switch (optchar) 326 { 327 case 'q': silent = 1; break; 328 case 't': s_nthread = atoi(optarg); break; 329 default: 330 fprintf(stderr, "Error: unknown option '%c'.\n", optchar); 331 return 1; 332 } 333 } 334 335 if (optind + 1 != argc) 336 { 337 fprintf(stderr, "Error: wrong number of arguments.\n"); 338 } 339 matrix_size = atoi(argv[optind]); 340 341 /* Error checking. */ 342 assert(matrix_size >= 1); 343 assert(s_nthread >= 1); 344 345 eps = epsilon(); 346 a = new_matrix(matrix_size, matrix_size); 347 init_matrix(a, matrix_size, matrix_size); 348 inv = invert_matrix(a, matrix_size); 349 prod = multiply_matrices(a, matrix_size, matrix_size, 350 inv, matrix_size, matrix_size); 351 error = identity_error(prod, matrix_size); 352 ratio = error / (eps * matrix_size); 353 if (! silent) 354 { 355 printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n", 356 error, eps, ratio); 357 } 358 if (isfinite(ratio) && ratio < 100) 359 printf("Error within bounds.\n"); 360 else 361 printf("Error out of bounds.\n"); 362 delete_matrix(prod); 363 delete_matrix(inv); 364 delete_matrix(a); 365 366 return 0; 367 } 368