1 /** Compute the matrix inverse via Gauss-Jordan elimination. 2 * This program uses only barriers to separate computation steps but no 3 * mutexes. It is an example of a race-free program on which no data races 4 * are reported by the happens-before algorithm (drd), but a lot of data races 5 * (all false positives) are reported by the Eraser-algorithm (helgrind). 6 */ 7 8 9 #define _GNU_SOURCE 10 11 /***********************/ 12 /* Include directives. */ 13 /***********************/ 14 15 #include <assert.h> 16 #include <math.h> 17 #include <limits.h> // PTHREAD_STACK_MIN 18 #include <pthread.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <unistd.h> // getopt() 22 23 24 /*********************/ 25 /* Type definitions. */ 26 /*********************/ 27 28 typedef double elem_t; 29 30 struct gj_threadinfo 31 { 32 pthread_barrier_t* b; 33 pthread_t tid; 34 elem_t* a; 35 int rows; 36 int cols; 37 int r0; 38 int r1; 39 }; 40 41 42 /********************/ 43 /* Local variables. */ 44 /********************/ 45 46 static int s_nthread = 1; 47 48 49 /*************************/ 50 /* Function definitions. */ 51 /*************************/ 52 53 /** Allocate memory for a matrix with the specified number of rows and 54 * columns. 55 */ 56 static elem_t* new_matrix(const int rows, const int cols) 57 { 58 assert(rows > 0); 59 assert(cols > 0); 60 return malloc(rows * cols * sizeof(elem_t)); 61 } 62 63 /** Free the memory that was allocated for a matrix. */ 64 static void delete_matrix(elem_t* const a) 65 { 66 free(a); 67 } 68 69 /** Fill in some numbers in a matrix. */ 70 static void init_matrix(elem_t* const a, const int rows, const int cols) 71 { 72 int i, j; 73 for (i = 0; i < rows; i++) 74 { 75 for (j = 0; j < rows; j++) 76 { 77 a[i * cols + j] = 1.0 / (1 + abs(i-j)); 78 } 79 } 80 } 81 82 /** Print all elements of a matrix. */ 83 void print_matrix(const char* const label, 84 const elem_t* const a, const int rows, const int cols) 85 { 86 int i, j; 87 printf("%s:\n", label); 88 for (i = 0; i < rows; i++) 89 { 90 for (j = 0; j < cols; j++) 91 { 92 printf("%g ", a[i * cols + j]); 93 } 94 printf("\n"); 95 } 96 } 97 98 /** Copy a subset of the elements of a matrix into another matrix. */ 99 static void copy_matrix(const elem_t* const from, 100 const int from_rows, 101 const int from_cols, 102 const int from_row_first, 103 const int from_row_last, 104 const int from_col_first, 105 const int from_col_last, 106 elem_t* const to, 107 const int to_rows, 108 const int to_cols, 109 const int to_row_first, 110 const int to_row_last, 111 const int to_col_first, 112 const int to_col_last) 113 { 114 int i, j; 115 116 assert(from_row_last - from_row_first == to_row_last - to_row_first); 117 assert(from_col_last - from_col_first == to_col_last - to_col_first); 118 119 for (i = from_row_first; i < from_row_last; i++) 120 { 121 assert(i < from_rows); 122 assert(i - from_row_first + to_row_first < to_rows); 123 for (j = from_col_first; j < from_col_last; j++) 124 { 125 assert(j < from_cols); 126 assert(j - from_col_first + to_col_first < to_cols); 127 to[(i - from_row_first + to_col_first) * to_cols 128 + (j - from_col_first + to_col_first)] 129 = from[i * from_cols + j]; 130 } 131 } 132 } 133 134 /** Compute the matrix product of a1 and a2. */ 135 static elem_t* multiply_matrices(const elem_t* const a1, 136 const int rows1, 137 const int cols1, 138 const elem_t* const a2, 139 const int rows2, 140 const int cols2) 141 { 142 int i, j, k; 143 elem_t* prod; 144 145 assert(cols1 == rows2); 146 147 prod = new_matrix(rows1, cols2); 148 for (i = 0; i < rows1; i++) 149 { 150 for (j = 0; j < cols2; j++) 151 { 152 prod[i * cols2 + j] = 0; 153 for (k = 0; k < cols1; k++) 154 { 155 prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j]; 156 } 157 } 158 } 159 return prod; 160 } 161 162 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting 163 * at row r0 and up to but not including row r1. It is assumed that as many 164 * threads execute this function concurrently as the count barrier p->b was 165 * initialized with. If the matrix p->a is nonsingular, and if matrix p->a 166 * has at least as many columns as rows, the result of this algorithm is that 167 * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix. 168 * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination 169 */ 170 static void gj_threadfunc(struct gj_threadinfo* p) 171 { 172 int i, j, k; 173 elem_t* const a = p->a; 174 const int rows = p->rows; 175 const int cols = p->cols; 176 elem_t aii; 177 178 for (i = 0; i < p->rows; i++) 179 { 180 if (pthread_barrier_wait(p->b) == PTHREAD_BARRIER_SERIAL_THREAD) 181 { 182 // Pivoting. 183 j = i; 184 for (k = i + 1; k < rows; k++) 185 { 186 if (a[k * cols + i] > a[j * cols + i]) 187 { 188 j = k; 189 } 190 } 191 if (j != i) 192 { 193 for (k = 0; k < cols; k++) 194 { 195 const elem_t t = a[i * cols + k]; 196 a[i * cols + k] = a[j * cols + k]; 197 a[j * cols + k] = t; 198 } 199 } 200 // Normalize row i. 201 aii = a[i * cols + i]; 202 if (aii != 0) 203 for (k = i; k < cols; k++) 204 a[i * cols + k] /= aii; 205 } 206 pthread_barrier_wait(p->b); 207 // Reduce all rows j != i. 208 for (j = p->r0; j < p->r1; j++) 209 { 210 if (i != j) 211 { 212 const elem_t factor = a[j * cols + i]; 213 for (k = 0; k < cols; k++) 214 { 215 a[j * cols + k] -= a[i * cols + k] * factor; 216 } 217 } 218 } 219 } 220 } 221 222 /** Multithreaded Gauss-Jordan algorithm. */ 223 static void gj(elem_t* const a, const int rows, const int cols) 224 { 225 int i; 226 struct gj_threadinfo* t; 227 pthread_barrier_t b; 228 pthread_attr_t attr; 229 int err; 230 231 assert(rows <= cols); 232 233 t = malloc(sizeof(struct gj_threadinfo) * s_nthread); 234 235 pthread_barrier_init(&b, 0, s_nthread); 236 237 pthread_attr_init(&attr); 238 err = pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN + 4096); 239 assert(err == 0); 240 241 for (i = 0; i < s_nthread; i++) 242 { 243 t[i].b = &b; 244 t[i].a = a; 245 t[i].rows = rows; 246 t[i].cols = cols; 247 t[i].r0 = i * rows / s_nthread; 248 t[i].r1 = (i+1) * rows / s_nthread; 249 pthread_create(&t[i].tid, &attr, (void*(*)(void*))gj_threadfunc, &t[i]); 250 } 251 252 pthread_attr_destroy(&attr); 253 254 for (i = 0; i < s_nthread; i++) 255 { 256 pthread_join(t[i].tid, 0); 257 } 258 259 pthread_barrier_destroy(&b); 260 261 free(t); 262 } 263 264 /** Matrix inversion via the Gauss-Jordan algorithm. */ 265 static elem_t* invert_matrix(const elem_t* const a, const int n) 266 { 267 int i, j; 268 elem_t* const inv = new_matrix(n, n); 269 elem_t* const tmp = new_matrix(n, 2*n); 270 copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n); 271 for (i = 0; i < n; i++) 272 for (j = 0; j < n; j++) 273 tmp[i * 2 * n + n + j] = (i == j); 274 gj(tmp, n, 2*n); 275 copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n); 276 delete_matrix(tmp); 277 return inv; 278 } 279 280 /** Compute the average square error between the identity matrix and the 281 * product of matrix a with its inverse matrix. 282 */ 283 static double identity_error(const elem_t* const a, const int n) 284 { 285 int i, j; 286 elem_t e = 0; 287 for (i = 0; i < n; i++) 288 { 289 for (j = 0; j < n; j++) 290 { 291 const elem_t d = a[i * n + j] - (i == j); 292 e += d * d; 293 } 294 } 295 return sqrt(e / (n * n)); 296 } 297 298 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the 299 * smallest number for which the sum of one and that number is different of 300 * one. It is assumed that the underlying representation of elem_t uses 301 * base two. 302 */ 303 static elem_t epsilon() 304 { 305 elem_t eps; 306 for (eps = 1; 1 + eps != 1; eps /= 2) 307 ; 308 return 2 * eps; 309 } 310 311 int main(int argc, char** argv) 312 { 313 int matrix_size; 314 int silent = 0; 315 int optchar; 316 elem_t *a, *inv, *prod; 317 elem_t eps; 318 double error; 319 double ratio; 320 321 while ((optchar = getopt(argc, argv, "qt:")) != EOF) 322 { 323 switch (optchar) 324 { 325 case 'q': silent = 1; break; 326 case 't': s_nthread = atoi(optarg); break; 327 default: 328 fprintf(stderr, "Error: unknown option '%c'.\n", optchar); 329 return 1; 330 } 331 } 332 333 if (optind + 1 != argc) 334 { 335 fprintf(stderr, "Error: wrong number of arguments.\n"); 336 return 1; 337 } 338 matrix_size = atoi(argv[optind]); 339 340 /* Error checking. */ 341 assert(matrix_size >= 1); 342 assert(s_nthread >= 1); 343 344 eps = epsilon(); 345 a = new_matrix(matrix_size, matrix_size); 346 init_matrix(a, matrix_size, matrix_size); 347 inv = invert_matrix(a, matrix_size); 348 prod = multiply_matrices(a, matrix_size, matrix_size, 349 inv, matrix_size, matrix_size); 350 error = identity_error(prod, matrix_size); 351 ratio = error / (eps * matrix_size); 352 if (! silent) 353 { 354 printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n", 355 error, eps, ratio); 356 } 357 if (isfinite(ratio) && ratio < 100) 358 printf("Error within bounds.\n"); 359 else 360 printf("Error out of bounds.\n"); 361 delete_matrix(prod); 362 delete_matrix(inv); 363 delete_matrix(a); 364 365 return 0; 366 } 367