Home | History | Annotate | Download | only in tests
      1 /** Compute the matrix inverse via Gauss-Jordan elimination.
      2  *  This program uses only barriers to separate computation steps but no
      3  *  mutexes. It is an example of a race-free program on which no data races
      4  *  are reported by the happens-before algorithm (drd), but a lot of data races
      5  *  (all false positives) are reported by the Eraser-algorithm (helgrind).
      6  */
      7 
      8 
      9 #define _GNU_SOURCE
     10 
     11 /***********************/
     12 /* Include directives. */
     13 /***********************/
     14 
     15 #include <assert.h>
     16 #include <math.h>
     17 #include <limits.h>  // PTHREAD_STACK_MIN
     18 #include <pthread.h>
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <unistd.h>  // getopt()
     22 
     23 
     24 /*********************/
     25 /* Type definitions. */
     26 /*********************/
     27 
     28 typedef double elem_t;
     29 
     30 struct gj_threadinfo
     31 {
     32   pthread_barrier_t* b;
     33   pthread_t tid;
     34   elem_t* a;
     35   int rows;
     36   int cols;
     37   int r0;
     38   int r1;
     39 };
     40 
     41 
     42 /********************/
     43 /* Local variables. */
     44 /********************/
     45 
     46 static int s_nthread = 1;
     47 
     48 
     49 /*************************/
     50 /* Function definitions. */
     51 /*************************/
     52 
     53 /** Allocate memory for a matrix with the specified number of rows and
     54  *  columns.
     55  */
     56 static elem_t* new_matrix(const int rows, const int cols)
     57 {
     58   assert(rows > 0);
     59   assert(cols > 0);
     60   return malloc(rows * cols * sizeof(elem_t));
     61 }
     62 
     63 /** Free the memory that was allocated for a matrix. */
     64 static void delete_matrix(elem_t* const a)
     65 {
     66   free(a);
     67 }
     68 
     69 /** Fill in some numbers in a matrix. */
     70 static void init_matrix(elem_t* const a, const int rows, const int cols)
     71 {
     72   int i, j;
     73   for (i = 0; i < rows; i++)
     74   {
     75     for (j = 0; j < rows; j++)
     76     {
     77       a[i * cols + j] = 1.0 / (1 + abs(i-j));
     78     }
     79   }
     80 }
     81 
     82 /** Print all elements of a matrix. */
     83 void print_matrix(const char* const label,
     84                   const elem_t* const a, const int rows, const int cols)
     85 {
     86   int i, j;
     87   printf("%s:\n", label);
     88   for (i = 0; i < rows; i++)
     89   {
     90     for (j = 0; j < cols; j++)
     91     {
     92       printf("%g ", a[i * cols + j]);
     93     }
     94     printf("\n");
     95   }
     96 }
     97 
     98 /** Copy a subset of the elements of a matrix into another matrix. */
     99 static void copy_matrix(const elem_t* const from,
    100                         const int from_rows,
    101                         const int from_cols,
    102                         const int from_row_first,
    103                         const int from_row_last,
    104                         const int from_col_first,
    105                         const int from_col_last,
    106                         elem_t* const to,
    107                         const int to_rows,
    108                         const int to_cols,
    109                         const int to_row_first,
    110                         const int to_row_last,
    111                         const int to_col_first,
    112                         const int to_col_last)
    113 {
    114   int i, j;
    115 
    116   assert(from_row_last - from_row_first == to_row_last - to_row_first);
    117   assert(from_col_last - from_col_first == to_col_last - to_col_first);
    118 
    119   for (i = from_row_first; i < from_row_last; i++)
    120   {
    121     assert(i < from_rows);
    122     assert(i - from_row_first + to_row_first < to_rows);
    123     for (j = from_col_first; j < from_col_last; j++)
    124     {
    125       assert(j < from_cols);
    126       assert(j - from_col_first + to_col_first < to_cols);
    127       to[(i - from_row_first + to_col_first) * to_cols
    128          + (j - from_col_first + to_col_first)]
    129         = from[i * from_cols + j];
    130     }
    131   }
    132 }
    133 
    134 /** Compute the matrix product of a1 and a2. */
    135 static elem_t* multiply_matrices(const elem_t* const a1,
    136                                  const int rows1,
    137                                  const int cols1,
    138                                  const elem_t* const a2,
    139                                  const int rows2,
    140                                  const int cols2)
    141 {
    142   int i, j, k;
    143   elem_t* prod;
    144 
    145   assert(cols1 == rows2);
    146 
    147   prod = new_matrix(rows1, cols2);
    148   for (i = 0; i < rows1; i++)
    149   {
    150     for (j = 0; j < cols2; j++)
    151     {
    152       prod[i * cols2 + j] = 0;
    153       for (k = 0; k < cols1; k++)
    154       {
    155         prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
    156       }
    157     }
    158   }
    159   return prod;
    160 }
    161 
    162 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
    163  *  at row r0 and up to but not including row r1. It is assumed that as many
    164  *  threads execute this function concurrently as the count barrier p->b was
    165  *  initialized with. If the matrix p->a is nonsingular, and if matrix p->a
    166  *  has at least as many columns as rows, the result of this algorithm is that
    167  *  submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
    168  * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
    169  */
    170 static void gj_threadfunc(struct gj_threadinfo* p)
    171 {
    172   int i, j, k;
    173   elem_t* const a = p->a;
    174   const int rows = p->rows;
    175   const int cols = p->cols;
    176 
    177   for (i = 0; i < p->rows; i++)
    178   {
    179     if (pthread_barrier_wait(p->b) == PTHREAD_BARRIER_SERIAL_THREAD)
    180     {
    181       // Pivoting.
    182       j = i;
    183       for (k = i + 1; k < rows; k++)
    184       {
    185         if (a[k * cols + i] > a[j * cols + i])
    186         {
    187           j = k;
    188         }
    189       }
    190       if (j != i)
    191       {
    192         for (k = 0; k < cols; k++)
    193         {
    194           const elem_t t = a[i * cols + k];
    195           a[i * cols + k] = a[j * cols + k];
    196           a[j * cols + k] = t;
    197         }
    198       }
    199       // Normalize row i.
    200       if (a[i * cols + i] != 0)
    201       {
    202         for (k = cols - 1; k >= 0; k--)
    203         {
    204           a[i * cols + k] /= a[i * cols + i];
    205         }
    206       }
    207     }
    208     pthread_barrier_wait(p->b);
    209     // Reduce all rows j != i.
    210     for (j = p->r0; j < p->r1; j++)
    211     {
    212       if (i != j)
    213       {
    214         const elem_t factor = a[j * cols + i];
    215         for (k = 0; k < cols; k++)
    216         {
    217           a[j * cols + k] -= a[i * cols + k] * factor;
    218         }
    219       }
    220     }
    221   }
    222 }
    223 
    224 /** Multithreaded Gauss-Jordan algorithm. */
    225 static void gj(elem_t* const a, const int rows, const int cols)
    226 {
    227   int i;
    228   struct gj_threadinfo* t;
    229   pthread_barrier_t b;
    230   pthread_attr_t attr;
    231   int err;
    232 
    233   assert(rows <= cols);
    234 
    235   t = malloc(sizeof(struct gj_threadinfo) * s_nthread);
    236 
    237   pthread_barrier_init(&b, 0, s_nthread);
    238 
    239   pthread_attr_init(&attr);
    240   err = pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN + 4096);
    241   assert(err == 0);
    242 
    243   for (i = 0; i < s_nthread; i++)
    244   {
    245     t[i].b = &b;
    246     t[i].a = a;
    247     t[i].rows = rows;
    248     t[i].cols = cols;
    249     t[i].r0 = i * rows / s_nthread;
    250     t[i].r1 = (i+1) * rows / s_nthread;
    251     pthread_create(&t[i].tid, &attr, (void*(*)(void*))gj_threadfunc, &t[i]);
    252   }
    253 
    254   pthread_attr_destroy(&attr);
    255 
    256   for (i = 0; i < s_nthread; i++)
    257   {
    258     pthread_join(t[i].tid, 0);
    259   }
    260 
    261   pthread_barrier_destroy(&b);
    262 
    263   free(t);
    264 }
    265 
    266 /** Matrix inversion via the Gauss-Jordan algorithm. */
    267 static elem_t* invert_matrix(const elem_t* const a, const int n)
    268 {
    269   int i, j;
    270   elem_t* const inv = new_matrix(n, n);
    271   elem_t* const tmp = new_matrix(n, 2*n);
    272   copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
    273   for (i = 0; i < n; i++)
    274     for (j = 0; j < n; j++)
    275       tmp[i * 2 * n + n + j] = (i == j);
    276   gj(tmp, n, 2*n);
    277   copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
    278   delete_matrix(tmp);
    279   return inv;
    280 }
    281 
    282 /** Compute the average square error between the identity matrix and the
    283  * product of matrix a with its inverse matrix.
    284  */
    285 static double identity_error(const elem_t* const a, const int n)
    286 {
    287   int i, j;
    288   elem_t e = 0;
    289   for (i = 0; i < n; i++)
    290   {
    291     for (j = 0; j < n; j++)
    292     {
    293       const elem_t d = a[i * n + j] - (i == j);
    294       e += d * d;
    295     }
    296   }
    297   return sqrt(e / (n * n));
    298 }
    299 
    300 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
    301  *  smallest number for which the sum of one and that number is different of
    302  *  one. It is assumed that the underlying representation of elem_t uses
    303  *  base two.
    304  */
    305 static elem_t epsilon()
    306 {
    307   elem_t eps;
    308   for (eps = 1; 1 + eps != 1; eps /= 2)
    309     ;
    310   return 2 * eps;
    311 }
    312 
    313 int main(int argc, char** argv)
    314 {
    315   int matrix_size;
    316   int silent = 0;
    317   int optchar;
    318   elem_t *a, *inv, *prod;
    319   elem_t eps;
    320   double error;
    321   double ratio;
    322 
    323   while ((optchar = getopt(argc, argv, "qt:")) != EOF)
    324   {
    325     switch (optchar)
    326     {
    327     case 'q': silent = 1; break;
    328     case 't': s_nthread = atoi(optarg); break;
    329     default:
    330       fprintf(stderr, "Error: unknown option '%c'.\n", optchar);
    331       return 1;
    332     }
    333   }
    334 
    335   if (optind + 1 != argc)
    336   {
    337     fprintf(stderr, "Error: wrong number of arguments.\n");
    338   }
    339   matrix_size = atoi(argv[optind]);
    340 
    341   /* Error checking. */
    342   assert(matrix_size >= 1);
    343   assert(s_nthread >= 1);
    344 
    345   eps = epsilon();
    346   a = new_matrix(matrix_size, matrix_size);
    347   init_matrix(a, matrix_size, matrix_size);
    348   inv = invert_matrix(a, matrix_size);
    349   prod = multiply_matrices(a, matrix_size, matrix_size,
    350                            inv, matrix_size, matrix_size);
    351   error = identity_error(prod, matrix_size);
    352   ratio = error / (eps * matrix_size);
    353   if (! silent)
    354   {
    355     printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
    356            error, eps, ratio);
    357   }
    358   if (isfinite(ratio) && ratio < 100)
    359     printf("Error within bounds.\n");
    360   else
    361     printf("Error out of bounds.\n");
    362   delete_matrix(prod);
    363   delete_matrix(inv);
    364   delete_matrix(a);
    365 
    366   return 0;
    367 }
    368