1 /* 2 * Experimental data distribution table generator 3 * Taken from the uncopyrighted NISTnet code (public domain). 4 * 5 * Read in a series of "random" data values, either 6 * experimentally or generated from some probability distribution. 7 * From this, create the inverse distribution table used to approximate 8 * the distribution. 9 */ 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <math.h> 13 #include <malloc.h> 14 #include <string.h> 15 #include <sys/types.h> 16 #include <sys/stat.h> 17 18 19 double * 20 readdoubles(FILE *fp, int *number) 21 { 22 struct stat info; 23 double *x; 24 int limit; 25 int n=0, i; 26 27 if (!fstat(fileno(fp), &info) && 28 info.st_size > 0) { 29 limit = 2*info.st_size/sizeof(double); /* @@ approximate */ 30 } else { 31 limit = 10000; 32 } 33 34 x = calloc(limit, sizeof(double)); 35 if (!x) { 36 perror("double alloc"); 37 exit(3); 38 } 39 40 for (i=0; i<limit; ++i){ 41 if (fscanf(fp, "%lf", &x[i]) != 1 || 42 feof(fp)) 43 break; 44 ++n; 45 } 46 *number = n; 47 return x; 48 } 49 50 void 51 arraystats(double *x, int limit, double *mu, double *sigma, double *rho) 52 { 53 int n=0, i; 54 double sumsquare=0.0, sum=0.0, top=0.0; 55 double sigma2=0.0; 56 57 for (i=0; i<limit; ++i){ 58 sumsquare += x[i]*x[i]; 59 sum += x[i]; 60 ++n; 61 } 62 *mu = sum/(double)n; 63 *sigma = sqrt((sumsquare - (double)n*(*mu)*(*mu))/(double)(n-1)); 64 65 for (i=1; i < n; ++i){ 66 top += ((double)x[i]- *mu)*((double)x[i-1]- *mu); 67 sigma2 += ((double)x[i-1] - *mu)*((double)x[i-1] - *mu); 68 69 } 70 *rho = top/sigma2; 71 } 72 73 /* Create a (normalized) distribution table from a set of observed 74 * values. The table is fixed to run from (as it happens) -4 to +4, 75 * with granularity .00002. 76 */ 77 78 #define TABLESIZE 16384/4 79 #define TABLEFACTOR 8192 80 #ifndef MINSHORT 81 #define MINSHORT -32768 82 #define MAXSHORT 32767 83 #endif 84 85 /* Since entries in the inverse are scaled by TABLEFACTOR, and can't be bigger 86 * than MAXSHORT, we don't bother looking at a larger domain than this: 87 */ 88 #define DISTTABLEDOMAIN ((MAXSHORT/TABLEFACTOR)+1) 89 #define DISTTABLEGRANULARITY 50000 90 #define DISTTABLESIZE (DISTTABLEDOMAIN*DISTTABLEGRANULARITY*2) 91 92 static int * 93 makedist(double *x, int limit, double mu, double sigma) 94 { 95 int *table; 96 int i, index, first=DISTTABLESIZE, last=0; 97 double input; 98 99 table = calloc(DISTTABLESIZE, sizeof(int)); 100 if (!table) { 101 perror("table alloc"); 102 exit(3); 103 } 104 105 for (i=0; i < limit; ++i) { 106 /* Normalize value */ 107 input = (x[i]-mu)/sigma; 108 109 index = (int)rint((input+DISTTABLEDOMAIN)*DISTTABLEGRANULARITY); 110 if (index < 0) index = 0; 111 if (index >= DISTTABLESIZE) index = DISTTABLESIZE-1; 112 ++table[index]; 113 if (index > last) 114 last = index +1; 115 if (index < first) 116 first = index; 117 } 118 return table; 119 } 120 121 /* replace an array by its cumulative distribution */ 122 static void 123 cumulativedist(int *table, int limit, int *total) 124 { 125 int accum=0; 126 127 while (--limit >= 0) { 128 accum += *table; 129 *table++ = accum; 130 } 131 *total = accum; 132 } 133 134 static short * 135 inverttable(int *table, int inversesize, int tablesize, int cumulative) 136 { 137 int i, inverseindex, inversevalue; 138 short *inverse; 139 double findex, fvalue; 140 141 inverse = (short *)malloc(inversesize*sizeof(short)); 142 for (i=0; i < inversesize; ++i) { 143 inverse[i] = MINSHORT; 144 } 145 for (i=0; i < tablesize; ++i) { 146 findex = ((double)i/(double)DISTTABLEGRANULARITY) - DISTTABLEDOMAIN; 147 fvalue = (double)table[i]/(double)cumulative; 148 inverseindex = (int)rint(fvalue*inversesize); 149 inversevalue = (int)rint(findex*TABLEFACTOR); 150 if (inversevalue <= MINSHORT) inversevalue = MINSHORT+1; 151 if (inversevalue > MAXSHORT) inversevalue = MAXSHORT; 152 if (inverseindex >= inversesize) inverseindex = inversesize- 1; 153 154 inverse[inverseindex] = inversevalue; 155 } 156 return inverse; 157 158 } 159 160 /* Run simple linear interpolation over the table to fill in missing entries */ 161 static void 162 interpolatetable(short *table, int limit) 163 { 164 int i, j, last, lasti = -1; 165 166 last = MINSHORT; 167 for (i=0; i < limit; ++i) { 168 if (table[i] == MINSHORT) { 169 for (j=i; j < limit; ++j) 170 if (table[j] != MINSHORT) 171 break; 172 if (j < limit) { 173 table[i] = last + (i-lasti)*(table[j]-last)/(j-lasti); 174 } else { 175 table[i] = last + (i-lasti)*(MAXSHORT-last)/(limit-lasti); 176 } 177 } else { 178 last = table[i]; 179 lasti = i; 180 } 181 } 182 } 183 184 static void 185 printtable(const short *table, int limit) 186 { 187 int i; 188 189 printf("# This is the distribution table for the experimental distribution.\n"); 190 191 for (i=0 ; i < limit; ++i) { 192 printf("%d%c", table[i], 193 (i % 8) == 7 ? '\n' : ' '); 194 } 195 } 196 197 int 198 main(int argc, char **argv) 199 { 200 FILE *fp; 201 double *x; 202 double mu, sigma, rho; 203 int limit; 204 int *table; 205 short *inverse; 206 int total; 207 208 if (argc > 1) { 209 if (!(fp = fopen(argv[1], "r"))) { 210 perror(argv[1]); 211 exit(1); 212 } 213 } else { 214 fp = stdin; 215 } 216 x = readdoubles(fp, &limit); 217 if (limit <= 0) { 218 fprintf(stderr, "Nothing much read!\n"); 219 exit(2); 220 } 221 arraystats(x, limit, &mu, &sigma, &rho); 222 #ifdef DEBUG 223 fprintf(stderr, "%d values, mu %10.4f, sigma %10.4f, rho %10.4f\n", 224 limit, mu, sigma, rho); 225 #endif 226 227 table = makedist(x, limit, mu, sigma); 228 free((void *) x); 229 cumulativedist(table, DISTTABLESIZE, &total); 230 inverse = inverttable(table, TABLESIZE, DISTTABLESIZE, total); 231 interpolatetable(inverse, TABLESIZE); 232 printtable(inverse, TABLESIZE); 233 return 0; 234 } 235