1 /* 2 * 3 * Generate Minimal Perfect Hash (genperf) 4 * 5 * Copyright (C) 2006-2007 Peter Johnson 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 #include <stdio.h> 29 #include <ctype.h> 30 #include <stdarg.h> 31 #include <string.h> 32 #include "tools/genperf/perfect.h" 33 #include "libyasm/compat-queue.h" 34 #include "libyasm/coretype.h" 35 #include "libyasm/errwarn.h" 36 37 typedef STAILQ_HEAD(slist, sval) slist; 38 typedef struct sval { 39 STAILQ_ENTRY(sval) link; 40 char *str; 41 } sval; 42 43 typedef STAILQ_HEAD(keyword_list, keyword) keyword_list; 44 typedef struct keyword { 45 STAILQ_ENTRY(keyword) link; 46 char *name; 47 char *args; 48 unsigned int line; 49 } keyword; 50 51 static unsigned int cur_line = 1; 52 static int errors = 0; 53 54 static void 55 report_error(const char *fmt, ...) 56 { 57 va_list ap; 58 59 fprintf(stderr, "%u: ", cur_line); 60 va_start(ap, fmt); 61 vfprintf(stderr, fmt, ap); 62 va_end(ap); 63 fputc('\n', stderr); 64 errors++; 65 } 66 67 void 68 yasm__fatal(const char *message, ...) 69 { 70 abort(); 71 } 72 73 /* make the c output for the perfect hash tab array */ 74 static void 75 make_c_tab( 76 FILE *f, 77 bstuff *tab, /* table indexed by b */ 78 ub4 smax, /* range of scramble[] */ 79 ub4 blen, /* b in 0..blen-1, power of 2 */ 80 ub4 *scramble) /* used in final hash */ 81 { 82 ub4 i; 83 /* table for the mapping for the perfect hash */ 84 if (blen >= USE_SCRAMBLE) { 85 /* A way to make the 1-byte values in tab bigger */ 86 if (smax > UB2MAXVAL+1) { 87 fprintf(f, " static const unsigned long scramble[] = {\n"); 88 for (i=0; i<=UB1MAXVAL; i+=4) 89 fprintf(f, " 0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n", 90 scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]); 91 } else { 92 fprintf(f, " static const unsigned short scramble[] = {\n"); 93 for (i=0; i<=UB1MAXVAL; i+=8) 94 fprintf(f, 95 " 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n", 96 scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3], 97 scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]); 98 } 99 fprintf(f, " };\n"); 100 fprintf(f, "\n"); 101 } 102 103 if (blen > 0) { 104 /* small adjustments to _a_ to make values distinct */ 105 if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE) 106 fprintf(f, " static const unsigned char "); 107 else 108 fprintf(f, " static const unsigned short "); 109 fprintf(f, "tab[] = {\n"); 110 111 if (blen < 16) { 112 for (i=0; i<blen; ++i) 113 fprintf(f, "%3ld,", scramble[tab[i].val_b]); 114 } else if (blen <= 1024) { 115 for (i=0; i<blen; i+=16) 116 fprintf(f, " %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n", 117 scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 118 scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 119 scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 120 scramble[tab[i+6].val_b], scramble[tab[i+7].val_b], 121 scramble[tab[i+8].val_b], scramble[tab[i+9].val_b], 122 scramble[tab[i+10].val_b], scramble[tab[i+11].val_b], 123 scramble[tab[i+12].val_b], scramble[tab[i+13].val_b], 124 scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]); 125 } else if (blen < USE_SCRAMBLE) { 126 for (i=0; i<blen; i+=8) 127 fprintf(f, " %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n", 128 scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 129 scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 130 scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 131 scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]); 132 } else { 133 for (i=0; i<blen; i+=16) 134 fprintf(f, " %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,\n", 135 tab[i+0].val_b, tab[i+1].val_b, 136 tab[i+2].val_b, tab[i+3].val_b, 137 tab[i+4].val_b, tab[i+5].val_b, 138 tab[i+6].val_b, tab[i+7].val_b, 139 tab[i+8].val_b, tab[i+9].val_b, 140 tab[i+10].val_b, tab[i+11].val_b, 141 tab[i+12].val_b, tab[i+13].val_b, 142 tab[i+14].val_b, tab[i+15].val_b); 143 } 144 fprintf(f, " };\n"); 145 fprintf(f, "\n"); 146 } 147 } 148 149 static void 150 perfect_gen(FILE *out, const char *lookup_function_name, 151 const char *struct_name, keyword_list *kws, 152 const char *filename) 153 { 154 ub4 nkeys; 155 key *keys; 156 hashform form; 157 bstuff *tab; /* table indexed by b */ 158 hstuff *tabh; /* table indexed by hash value */ 159 ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */ 160 ub4 alen; /* a in 0..alen-1, a power of 2 */ 161 ub4 blen; /* b in 0..blen-1, a power of 2 */ 162 ub4 salt; /* a parameter to the hash function */ 163 gencode final; /* code for final hash */ 164 ub4 i; 165 ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */ 166 char buf[10][80]; /* buffer for generated code */ 167 char *buf2[10]; /* also for generated code */ 168 keyword *kw; 169 170 /* perfect hash configuration */ 171 form.mode = NORMAL_HM; 172 form.hashtype = STRING_HT; 173 form.perfect = MINIMAL_HP; 174 form.speed = SLOW_HS; 175 176 /* set up code for final hash */ 177 final.line = buf2; 178 final.used = 0; 179 final.len = 10; 180 for (i=0; i<10; i++) 181 final.line[i] = buf[i]; 182 183 /* build list of keys */ 184 nkeys = 0; 185 keys = NULL; 186 STAILQ_FOREACH(kw, kws, link) { 187 key *k = yasm_xmalloc(sizeof(key)); 188 189 k->name_k = yasm__xstrdup(kw->name); 190 k->len_k = (ub4)strlen(kw->name); 191 k->next_k = keys; 192 keys = k; 193 nkeys++; 194 } 195 196 /* find the hash */ 197 findhash(&tab, &tabh, &alen, &blen, &salt, &final, 198 scramble, &smax, keys, nkeys, &form); 199 200 /* The hash function beginning */ 201 fprintf(out, "static const struct %s *\n", struct_name); 202 fprintf(out, "%s(const char *key, size_t len)\n", lookup_function_name); 203 fprintf(out, "{\n"); 204 205 /* output the dir table: this should loop up to smax for NORMAL_HP, 206 * or up to pakd.nkeys for MINIMAL_HP. 207 */ 208 fprintf(out, " static const struct %s pd[%lu] = {\n", struct_name, nkeys); 209 for (i=0; i<nkeys; i++) { 210 if (tabh[i].key_h) { 211 STAILQ_FOREACH(kw, kws, link) { 212 if (strcmp(kw->name, tabh[i].key_h->name_k) == 0) 213 break; 214 } 215 if (!kw) { 216 report_error("internal error: could not find `%s'", 217 tabh[i].key_h->name_k); 218 break; 219 } 220 fprintf(out, "#line %u \"%s\"\n", kw->line, filename); 221 fprintf(out, " {\"%s\"%s}", kw->name, kw->args); 222 } else 223 fprintf(out, " { NULL }"); 224 225 if (i < nkeys-1) 226 fprintf(out, ","); 227 fprintf(out, "\n"); 228 } 229 fprintf(out, " };\n"); 230 231 /* output the hash tab[] array */ 232 make_c_tab(out, tab, smax, blen, scramble); 233 234 /* The hash function body */ 235 fprintf(out, " const struct %s *ret;\n", struct_name); 236 for (i=0; i<final.used; ++i) 237 fprintf(out, "%s", final.line[i]); 238 fprintf(out, " if (rsl >= %lu) return NULL;\n", nkeys); 239 fprintf(out, " ret = &pd[rsl];\n"); 240 fprintf(out, " if (strcmp(key, ret->name) != 0) return NULL;\n"); 241 fprintf(out, " return ret;\n"); 242 fprintf(out, "}\n"); 243 fprintf(out, "\n"); 244 245 free(tab); 246 free(tabh); 247 } 248 249 int 250 main(int argc, char *argv[]) 251 { 252 FILE *in, *out; 253 size_t i; 254 char *ch; 255 static char line[1024], tmp[1024]; 256 static char struct_name[128] = ""; 257 static char lookup_function_name[128] = "in_word_set"; 258 static char language[16] = ""; 259 static char delimiters[16] = ",\r\n"; 260 static char name[128]; 261 static char filename[768]; 262 int need_struct = 0; 263 int have_struct = 0; 264 int go_keywords = 0; 265 int ignore_case = 0; 266 int compare_strncmp = 0; 267 int readonly_tables = 0; 268 slist usercode, usercode2; 269 keyword_list keywords; 270 sval *sv; 271 keyword *kw; 272 273 if (argc != 3) { 274 fprintf(stderr, "Usage: genperf <in> <out>\n"); 275 return EXIT_FAILURE; 276 } 277 278 in = fopen(argv[1], "rt"); 279 if (!in) { 280 fprintf(stderr, "Could not open `%s' for reading\n", argv[1]); 281 return EXIT_FAILURE; 282 } 283 284 ch = argv[1]; 285 i = 0; 286 while (*ch && i < 767) { 287 if (*ch == '\\') { 288 filename[i++] = '/'; 289 ch++; 290 } else 291 filename[i++] = *ch++; 292 } 293 filename[i] = '\0'; 294 295 STAILQ_INIT(&usercode); 296 STAILQ_INIT(&usercode2); 297 STAILQ_INIT(&keywords); 298 299 /* Parse declarations section */ 300 while (fgets(line, 1024, in)) { 301 /* Comments start with # as the first thing on a line */ 302 if (line[0] == '#') { 303 cur_line++; 304 continue; 305 } 306 307 /* Handle structure declaration */ 308 if (strncmp(line, "struct", 6) == 0) { 309 int braces; 310 311 if (!need_struct) { 312 report_error("struct without %%struct-type declaration"); 313 return EXIT_FAILURE; 314 } 315 if (have_struct) { 316 report_error("more than one struct declaration"); 317 return EXIT_FAILURE; 318 } 319 have_struct = 1; 320 321 /* copy struct name */ 322 ch = &line[6]; 323 while (isspace(*ch)) 324 ch++; 325 i = 0; 326 while ((isalnum(*ch) || *ch == '_') && i < 127) 327 struct_name[i++] = *ch++; 328 if (i == 127) { 329 report_error("struct name too long"); 330 return EXIT_FAILURE; 331 } 332 struct_name[i] = '\0'; 333 334 sv = yasm_xmalloc(sizeof(sval)); 335 sprintf(tmp, "#line %u \"%s\"\n", cur_line, filename); 336 sv->str = yasm__xstrdup(tmp); 337 STAILQ_INSERT_TAIL(&usercode, sv, link); 338 339 braces = 0; 340 do { 341 /* count braces to determine when we're done */ 342 ch = line; 343 while (*ch != '\0') { 344 if (*ch == '{') 345 braces++; 346 if (*ch == '}') 347 braces--; 348 ch++; 349 } 350 sv = yasm_xmalloc(sizeof(sval)); 351 sv->str = yasm__xstrdup(line); 352 STAILQ_INSERT_TAIL(&usercode, sv, link); 353 cur_line++; 354 if (braces <= 0) 355 break; 356 } while (fgets(line, 1024, in)); 357 cur_line++; 358 continue; 359 } 360 361 /* Ignore non-declaration lines */ 362 if (line[0] != '%') { 363 cur_line++; 364 continue; 365 } 366 367 /* %% terminates declarations section */ 368 if (line[1] == '%') { 369 if (need_struct && !have_struct) { 370 report_error("%%struct-type declaration, but no struct found"); 371 return EXIT_FAILURE; 372 } 373 go_keywords = 1; 374 break; /* move on to keywords section */ 375 } 376 377 /* %{ begins a verbatim code section that ends with %} */ 378 if (line[1] == '{') { 379 sv = yasm_xmalloc(sizeof(sval)); 380 sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename); 381 sv->str = yasm__xstrdup(tmp); 382 STAILQ_INSERT_TAIL(&usercode, sv, link); 383 384 while (fgets(line, 1024, in)) { 385 cur_line++; 386 if (line[0] == '%' && line[1] == '}') 387 break; 388 sv = yasm_xmalloc(sizeof(sval)); 389 sv->str = yasm__xstrdup(line); 390 STAILQ_INSERT_TAIL(&usercode, sv, link); 391 } 392 cur_line++; 393 continue; 394 } 395 396 if (strncmp(&line[1], "ignore-case", 11) == 0) { 397 ignore_case = 1; 398 } else if (strncmp(&line[1], "compare-strncmp", 15) == 0) { 399 compare_strncmp = 1; 400 } else if (strncmp(&line[1], "readonly-tables", 15) == 0) { 401 readonly_tables = 1; 402 } else if (strncmp(&line[1], "language=", 9) == 0) { 403 ch = &line[10]; 404 i = 0; 405 while (*ch != '\n' && i<15) 406 language[i++] = *ch++; 407 language[i] = '\0'; 408 } else if (strncmp(&line[1], "delimiters=", 11) == 0) { 409 ch = &line[12]; 410 i = 0; 411 while (i<15) 412 delimiters[i++] = *ch++; 413 delimiters[i] = '\0'; 414 } else if (strncmp(&line[1], "enum", 4) == 0) { 415 /* unused */ 416 } else if (strncmp(&line[1], "struct-type", 11) == 0) { 417 need_struct = 1; 418 } else if (strncmp(&line[1], "define", 6) == 0) { 419 /* Several different defines we need to handle */ 420 ch = &line[7]; 421 while (isspace(*ch)) 422 ch++; 423 424 if (strncmp(ch, "hash-function-name", 18) == 0) { 425 /* unused */ 426 } else if (strncmp(ch, "lookup-function-name", 20) == 0) { 427 ch = &line[7+20+1]; 428 while (isspace(*ch)) 429 ch++; 430 i = 0; 431 while ((isalnum(*ch) || *ch == '_') && i < 127) 432 lookup_function_name[i++] = *ch++; 433 if (i == 127) { 434 report_error("struct name too long"); 435 return EXIT_FAILURE; 436 } 437 lookup_function_name[i] = '\0'; 438 } else { 439 fprintf(stderr, "%u: unrecognized define `%s'\n", cur_line, 440 line); 441 } 442 } else { 443 fprintf(stderr, "%u: unrecognized declaration `%s'\n", cur_line, 444 line); 445 } 446 447 cur_line++; 448 } 449 450 if (!go_keywords) { 451 report_error("no keywords section found"); 452 return EXIT_FAILURE; 453 } 454 455 /* Parse keywords section */ 456 while (fgets(line, 1024, in)) { 457 char *d; 458 459 /* Comments start with # as the first thing on a line */ 460 if (line[0] == '#') { 461 cur_line++; 462 continue; 463 } 464 465 /* Keywords section terminated with %% */ 466 if (line[0] == '%' && line[1] == '%') 467 break; 468 469 /* Look for name */ 470 ch = &line[0]; 471 i = 0; 472 while (strchr(delimiters, *ch) == NULL && i < 127) 473 name[i++] = *ch++; 474 if (i == 127) { 475 report_error("keyword name too long"); 476 return EXIT_FAILURE; 477 } 478 name[i] = '\0'; 479 480 /* Strip EOL */ 481 d = strrchr(ch, '\n'); 482 if (d) 483 *d = '\0'; 484 d = strrchr(ch, '\r'); 485 if (d) 486 *d = '\0'; 487 kw = yasm_xmalloc(sizeof(keyword)); 488 kw->name = yasm__xstrdup(name); 489 kw->args = yasm__xstrdup(ch); 490 kw->line = cur_line; 491 STAILQ_INSERT_TAIL(&keywords, kw, link); 492 cur_line++; 493 } 494 495 if (errors > 0) 496 return EXIT_FAILURE; 497 498 /* Pull in any end code */ 499 if (!feof(in)) { 500 sv = yasm_xmalloc(sizeof(sval)); 501 sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename); 502 sv->str = yasm__xstrdup(tmp); 503 STAILQ_INSERT_TAIL(&usercode2, sv, link); 504 505 while (fgets(line, 1024, in)) { 506 sv = yasm_xmalloc(sizeof(sval)); 507 sv->str = yasm__xstrdup(line); 508 STAILQ_INSERT_TAIL(&usercode2, sv, link); 509 } 510 } 511 512 /* output code */ 513 out = fopen(argv[2], "wt"); 514 if (!out) { 515 fprintf(stderr, "Could not open `%s' for writing\n", argv[2]); 516 return EXIT_FAILURE; 517 } 518 519 fprintf(out, "/* %s code produced by genperf */\n", language); 520 fprintf(out, "/* Command-line: genperf %s %s */\n", argv[1], argv[2]); 521 522 STAILQ_FOREACH(sv, &usercode, link) 523 fprintf(out, "%s", sv->str); 524 525 /* Get perfect hash */ 526 perfect_gen(out, lookup_function_name, struct_name, &keywords, filename); 527 528 STAILQ_FOREACH(sv, &usercode2, link) 529 fprintf(out, "%s", sv->str); 530 531 fclose(out); 532 533 if (errors > 0) { 534 remove(argv[2]); 535 return EXIT_FAILURE; 536 } 537 538 return EXIT_SUCCESS; 539 } 540 541