Home | History | Annotate | Download | only in tests
      1 /** Compute the matrix inverse via Gauss-Jordan elimination.
      2  *  This program uses OpenMP separate computation steps but no
      3  *  mutexes. It is an example of a race-free program on which no data races
      4  *  are reported by the happens-before algorithm (drd), but a lot of data races
      5  *  (all false positives) are reported by the Eraser-algorithm (helgrind).
      6  */
      7 
      8 
      9 #define _GNU_SOURCE
     10 
     11 /***********************/
     12 /* Include directives. */
     13 /***********************/
     14 
     15 #include <assert.h>
     16 #include <math.h>
     17 #include <omp.h>
     18 #include <stdio.h>
     19 #include <stdlib.h>
     20 #include <unistd.h>  // getopt()
     21 
     22 
     23 /*********************/
     24 /* Type definitions. */
     25 /*********************/
     26 
     27 typedef double elem_t;
     28 
     29 
     30 /********************/
     31 /* Local variables. */
     32 /********************/
     33 
     34 static int s_trigger_race;
     35 
     36 
     37 /*************************/
     38 /* Function definitions. */
     39 /*************************/
     40 
     41 /** Allocate memory for a matrix with the specified number of rows and
     42  *  columns.
     43  */
     44 static elem_t* new_matrix(const int rows, const int cols)
     45 {
     46   assert(rows > 0);
     47   assert(cols > 0);
     48   return malloc(rows * cols * sizeof(elem_t));
     49 }
     50 
     51 /** Free the memory that was allocated for a matrix. */
     52 static void delete_matrix(elem_t* const a)
     53 {
     54   free(a);
     55 }
     56 
     57 /** Fill in some numbers in a matrix. */
     58 static void init_matrix(elem_t* const a, const int rows, const int cols)
     59 {
     60   int i, j;
     61   for (i = 0; i < rows; i++)
     62   {
     63     for (j = 0; j < rows; j++)
     64     {
     65       a[i * cols + j] = 1.0 / (1 + abs(i-j));
     66     }
     67   }
     68 }
     69 
     70 /** Print all elements of a matrix. */
     71 void print_matrix(const char* const label,
     72                   const elem_t* const a, const int rows, const int cols)
     73 {
     74   int i, j;
     75   printf("%s:\n", label);
     76   for (i = 0; i < rows; i++)
     77   {
     78     for (j = 0; j < cols; j++)
     79     {
     80       printf("%g ", a[i * cols + j]);
     81     }
     82     printf("\n");
     83   }
     84 }
     85 
     86 /** Copy a subset of the elements of a matrix into another matrix. */
     87 static void copy_matrix(const elem_t* const from,
     88                         const int from_rows,
     89                         const int from_cols,
     90                         const int from_row_first,
     91                         const int from_row_last,
     92                         const int from_col_first,
     93                         const int from_col_last,
     94                         elem_t* const to,
     95                         const int to_rows,
     96                         const int to_cols,
     97                         const int to_row_first,
     98                         const int to_row_last,
     99                         const int to_col_first,
    100                         const int to_col_last)
    101 {
    102   int i, j;
    103 
    104   assert(from_row_last - from_row_first == to_row_last - to_row_first);
    105   assert(from_col_last - from_col_first == to_col_last - to_col_first);
    106 
    107   for (i = from_row_first; i < from_row_last; i++)
    108   {
    109     assert(i < from_rows);
    110     assert(i - from_row_first + to_row_first < to_rows);
    111     for (j = from_col_first; j < from_col_last; j++)
    112     {
    113       assert(j < from_cols);
    114       assert(j - from_col_first + to_col_first < to_cols);
    115       to[(i - from_row_first + to_col_first) * to_cols
    116          + (j - from_col_first + to_col_first)]
    117         = from[i * from_cols + j];
    118     }
    119   }
    120 }
    121 
    122 /** Compute the matrix product of a1 and a2. */
    123 static elem_t* multiply_matrices(const elem_t* const a1,
    124                                  const int rows1,
    125                                  const int cols1,
    126                                  const elem_t* const a2,
    127                                  const int rows2,
    128                                  const int cols2)
    129 {
    130   int i, j, k;
    131   elem_t* prod;
    132 
    133   assert(cols1 == rows2);
    134 
    135   prod = new_matrix(rows1, cols2);
    136   for (i = 0; i < rows1; i++)
    137   {
    138     for (j = 0; j < cols2; j++)
    139     {
    140       prod[i * cols2 + j] = 0;
    141       for (k = 0; k < cols1; k++)
    142       {
    143         prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
    144       }
    145     }
    146   }
    147   return prod;
    148 }
    149 
    150 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
    151  *  at row r0 and up to but not including row r1. It is assumed that as many
    152  *  threads execute this function concurrently as the count barrier p->b was
    153  *  initialized with. If the matrix p->a is nonsingular, and if matrix p->a
    154  *  has at least as many columns as rows, the result of this algorithm is that
    155  *  submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
    156  * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
    157  */
    158 static void gj(elem_t* const a, const int rows, const int cols)
    159 {
    160   int i, j, k;
    161 
    162   for (i = 0; i < rows; i++)
    163   {
    164     {
    165       // Pivoting.
    166       j = i;
    167       for (k = i + 1; k < rows; k++)
    168       {
    169         if (a[k * cols + i] > a[j * cols + i])
    170         {
    171           j = k;
    172         }
    173       }
    174       if (j != i)
    175       {
    176         for (k = 0; k < cols; k++)
    177         {
    178           const elem_t t = a[i * cols + k];
    179           a[i * cols + k] = a[j * cols + k];
    180           a[j * cols + k] = t;
    181         }
    182       }
    183       // Normalize row i.
    184       if (a[i * cols + i] != 0)
    185       {
    186         for (k = cols - 1; k >= 0; k--)
    187         {
    188           a[i * cols + k] /= a[i * cols + i];
    189         }
    190       }
    191     }
    192 
    193     // Reduce all rows j != i.
    194 
    195     if (s_trigger_race)
    196     {
    197 #     pragma omp parallel for private(j)
    198       for (j = 0; j < rows; j++)
    199       {
    200         if (i != j)
    201         {
    202           const elem_t factor = a[j * cols + i];
    203           for (k = 0; k < cols; k++)
    204           {
    205             a[j * cols + k] -= a[i * cols + k] * factor;
    206           }
    207         }
    208       }
    209     }
    210     else
    211     {
    212 #     pragma omp parallel for private(j, k)
    213       for (j = 0; j < rows; j++)
    214       {
    215         if (i != j)
    216         {
    217           const elem_t factor = a[j * cols + i];
    218           for (k = 0; k < cols; k++)
    219           {
    220             a[j * cols + k] -= a[i * cols + k] * factor;
    221           }
    222         }
    223       }
    224     }
    225   }
    226 }
    227 
    228 /** Matrix inversion via the Gauss-Jordan algorithm. */
    229 static elem_t* invert_matrix(const elem_t* const a, const int n)
    230 {
    231   int i, j;
    232   elem_t* const inv = new_matrix(n, n);
    233   elem_t* const tmp = new_matrix(n, 2*n);
    234   copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
    235   for (i = 0; i < n; i++)
    236     for (j = 0; j < n; j++)
    237       tmp[i * 2 * n + n + j] = (i == j);
    238   gj(tmp, n, 2*n);
    239   copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
    240   delete_matrix(tmp);
    241   return inv;
    242 }
    243 
    244 /** Compute the average square error between the identity matrix and the
    245  * product of matrix a with its inverse matrix.
    246  */
    247 static double identity_error(const elem_t* const a, const int n)
    248 {
    249   int i, j;
    250   elem_t e = 0;
    251   for (i = 0; i < n; i++)
    252   {
    253     for (j = 0; j < n; j++)
    254     {
    255       const elem_t d = a[i * n + j] - (i == j);
    256       e += d * d;
    257     }
    258   }
    259   return sqrt(e / (n * n));
    260 }
    261 
    262 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
    263  *  smallest number for which the sum of one and that number is different of
    264  *  one. It is assumed that the underlying representation of elem_t uses
    265  *  base two.
    266  */
    267 static elem_t epsilon()
    268 {
    269   elem_t eps;
    270   for (eps = 1; 1 + eps != 1; eps /= 2)
    271     ;
    272   return 2 * eps;
    273 }
    274 
    275 static void usage(const char* const exe)
    276 {
    277   printf("Usage: %s [-h] [-q] [-r] [-t<n>] <m>\n"
    278          "-h: display this information.\n"
    279          "-q: quiet mode -- do not print computed error.\n"
    280          "-r: trigger a race condition.\n"
    281          "-t<n>: use <n> threads.\n"
    282          "<m>: matrix size.\n",
    283          exe);
    284 }
    285 
    286 int main(int argc, char** argv)
    287 {
    288   int matrix_size;
    289   int nthread = 1;
    290   int silent = 0;
    291   int optchar;
    292   elem_t *a, *inv, *prod;
    293   elem_t eps;
    294   double error;
    295   double ratio;
    296 
    297   while ((optchar = getopt(argc, argv, "hqrt:")) != EOF)
    298   {
    299     switch (optchar)
    300     {
    301     case 'h': usage(argv[0]); return 1;
    302     case 'q': silent = 1; break;
    303     case 'r': s_trigger_race = 1; break;
    304     case 't': nthread = atoi(optarg); break;
    305     default:
    306       return 1;
    307     }
    308   }
    309 
    310   if (optind + 1 != argc)
    311   {
    312     fprintf(stderr, "Error: wrong number of arguments.\n");
    313     return 1;
    314   }
    315   matrix_size = atoi(argv[optind]);
    316 
    317   /* Error checking. */
    318   assert(matrix_size >= 1);
    319   assert(nthread >= 1);
    320 
    321   omp_set_num_threads(nthread);
    322   omp_set_dynamic(0);
    323 
    324   eps = epsilon();
    325   a = new_matrix(matrix_size, matrix_size);
    326   init_matrix(a, matrix_size, matrix_size);
    327   inv = invert_matrix(a, matrix_size);
    328   prod = multiply_matrices(a, matrix_size, matrix_size,
    329                            inv, matrix_size, matrix_size);
    330   error = identity_error(prod, matrix_size);
    331   ratio = error / (eps * matrix_size);
    332   if (! silent)
    333   {
    334     printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
    335            error, eps, ratio);
    336   }
    337   if (isfinite(ratio) && ratio < 100)
    338     printf("Error within bounds.\n");
    339   else
    340     printf("Error out of bounds.\n");
    341   delete_matrix(prod);
    342   delete_matrix(inv);
    343   delete_matrix(a);
    344 
    345   return 0;
    346 }
    347