Home | History | Annotate | Download | only in genperf
      1 /*
      2  *
      3  * Generate Minimal Perfect Hash (genperf)
      4  *
      5  *  Copyright (C) 2006-2007  Peter Johnson
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
     17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
     20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 #include <stdio.h>
     29 #include <ctype.h>
     30 #include <stdarg.h>
     31 #include <string.h>
     32 #include "tools/genperf/perfect.h"
     33 #include "libyasm/compat-queue.h"
     34 #include "libyasm/coretype.h"
     35 #include "libyasm/errwarn.h"
     36 
     37 typedef STAILQ_HEAD(slist, sval) slist;
     38 typedef struct sval {
     39     STAILQ_ENTRY(sval) link;
     40     char *str;
     41 } sval;
     42 
     43 typedef STAILQ_HEAD(keyword_list, keyword) keyword_list;
     44 typedef struct keyword {
     45     STAILQ_ENTRY(keyword) link;
     46     char *name;
     47     char *args;
     48     unsigned int line;
     49 } keyword;
     50 
     51 static unsigned int cur_line = 1;
     52 static int errors = 0;
     53 
     54 static void
     55 report_error(const char *fmt, ...)
     56 {
     57     va_list ap;
     58 
     59     fprintf(stderr, "%u: ", cur_line);
     60     va_start(ap, fmt);
     61     vfprintf(stderr, fmt, ap);
     62     va_end(ap);
     63     fputc('\n', stderr);
     64     errors++;
     65 }
     66 
     67 void
     68 yasm__fatal(const char *message, ...)
     69 {
     70     abort();
     71 }
     72 
     73 /* make the c output for the perfect hash tab array */
     74 static void
     75 make_c_tab(
     76     FILE *f,
     77     bstuff *tab,        /* table indexed by b */
     78     ub4 smax,           /* range of scramble[] */
     79     ub4 blen,           /* b in 0..blen-1, power of 2 */
     80     ub4 *scramble)      /* used in final hash */
     81 {
     82     ub4   i;
     83     /* table for the mapping for the perfect hash */
     84     if (blen >= USE_SCRAMBLE) {
     85         /* A way to make the 1-byte values in tab bigger */
     86         if (smax > UB2MAXVAL+1) {
     87             fprintf(f, "  static const unsigned long scramble[] = {\n");
     88             for (i=0; i<=UB1MAXVAL; i+=4)
     89                 fprintf(f, "    0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n",
     90                     scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]);
     91         } else {
     92             fprintf(f, "  static const unsigned short scramble[] = {\n");
     93             for (i=0; i<=UB1MAXVAL; i+=8)
     94                 fprintf(f,
     95 "    0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n",
     96                     scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3],
     97                     scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]);
     98         }
     99         fprintf(f, "  };\n");
    100         fprintf(f, "\n");
    101     }
    102 
    103     if (blen > 0) {
    104         /* small adjustments to _a_ to make values distinct */
    105         if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
    106             fprintf(f, "  static const unsigned char ");
    107         else
    108             fprintf(f, "  static const unsigned short ");
    109         fprintf(f, "tab[] = {\n");
    110 
    111         if (blen < 16) {
    112             for (i=0; i<blen; ++i)
    113                 fprintf(f, "%3ld,", scramble[tab[i].val_b]);
    114         } else if (blen <= 1024) {
    115             for (i=0; i<blen; i+=16)
    116                 fprintf(f, "    %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
    117                     scramble[tab[i+0].val_b], scramble[tab[i+1].val_b],
    118                     scramble[tab[i+2].val_b], scramble[tab[i+3].val_b],
    119                     scramble[tab[i+4].val_b], scramble[tab[i+5].val_b],
    120                     scramble[tab[i+6].val_b], scramble[tab[i+7].val_b],
    121                     scramble[tab[i+8].val_b], scramble[tab[i+9].val_b],
    122                     scramble[tab[i+10].val_b], scramble[tab[i+11].val_b],
    123                     scramble[tab[i+12].val_b], scramble[tab[i+13].val_b],
    124                     scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]);
    125         } else if (blen < USE_SCRAMBLE) {
    126             for (i=0; i<blen; i+=8)
    127                 fprintf(f, "    %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
    128                     scramble[tab[i+0].val_b], scramble[tab[i+1].val_b],
    129                     scramble[tab[i+2].val_b], scramble[tab[i+3].val_b],
    130                     scramble[tab[i+4].val_b], scramble[tab[i+5].val_b],
    131                     scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]);
    132         } else {
    133             for (i=0; i<blen; i+=16)
    134                 fprintf(f, "    %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,\n",
    135                     tab[i+0].val_b, tab[i+1].val_b,
    136                     tab[i+2].val_b, tab[i+3].val_b,
    137                     tab[i+4].val_b, tab[i+5].val_b,
    138                     tab[i+6].val_b, tab[i+7].val_b,
    139                     tab[i+8].val_b, tab[i+9].val_b,
    140                     tab[i+10].val_b, tab[i+11].val_b,
    141                     tab[i+12].val_b, tab[i+13].val_b,
    142                     tab[i+14].val_b, tab[i+15].val_b);
    143         }
    144         fprintf(f, "  };\n");
    145         fprintf(f, "\n");
    146     }
    147 }
    148 
    149 static void
    150 perfect_gen(FILE *out, const char *lookup_function_name,
    151             const char *struct_name, keyword_list *kws,
    152             const char *filename)
    153 {
    154     ub4 nkeys;
    155     key *keys;
    156     hashform form;
    157     bstuff *tab;                /* table indexed by b */
    158     hstuff *tabh;               /* table indexed by hash value */
    159     ub4 smax;           /* scramble[] values in 0..smax-1, a power of 2 */
    160     ub4 alen;                   /* a in 0..alen-1, a power of 2 */
    161     ub4 blen;                   /* b in 0..blen-1, a power of 2 */
    162     ub4 salt;                   /* a parameter to the hash function */
    163     gencode final;              /* code for final hash */
    164     ub4 i;
    165     ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */
    166     char buf[10][80];           /* buffer for generated code */
    167     char *buf2[10];             /* also for generated code */
    168     keyword *kw;
    169 
    170     /* perfect hash configuration */
    171     form.mode = NORMAL_HM;
    172     form.hashtype = STRING_HT;
    173     form.perfect = MINIMAL_HP;
    174     form.speed = SLOW_HS;
    175 
    176     /* set up code for final hash */
    177     final.line = buf2;
    178     final.used = 0;
    179     final.len  = 10;
    180     for (i=0; i<10; i++)
    181         final.line[i] = buf[i];
    182 
    183     /* build list of keys */
    184     nkeys = 0;
    185     keys = NULL;
    186     STAILQ_FOREACH(kw, kws, link) {
    187         key *k = yasm_xmalloc(sizeof(key));
    188 
    189         k->name_k = yasm__xstrdup(kw->name);
    190         k->len_k = (ub4)strlen(kw->name);
    191         k->next_k = keys;
    192         keys = k;
    193         nkeys++;
    194     }
    195 
    196     /* find the hash */
    197     findhash(&tab, &tabh, &alen, &blen, &salt, &final,
    198              scramble, &smax, keys, nkeys, &form);
    199 
    200     /* The hash function beginning */
    201     fprintf(out, "static const struct %s *\n", struct_name);
    202     fprintf(out, "%s(const char *key, size_t len)\n", lookup_function_name);
    203     fprintf(out, "{\n");
    204 
    205     /* output the dir table: this should loop up to smax for NORMAL_HP,
    206      * or up to pakd.nkeys for MINIMAL_HP.
    207      */
    208     fprintf(out, "  static const struct %s pd[%lu] = {\n", struct_name, nkeys);
    209     for (i=0; i<nkeys; i++) {
    210         if (tabh[i].key_h) {
    211             STAILQ_FOREACH(kw, kws, link) {
    212                 if (strcmp(kw->name, tabh[i].key_h->name_k) == 0)
    213                     break;
    214             }
    215             if (!kw) {
    216                 report_error("internal error: could not find `%s'",
    217                              tabh[i].key_h->name_k);
    218                 break;
    219             }
    220             fprintf(out, "#line %u \"%s\"\n", kw->line, filename);
    221             fprintf(out, "    {\"%s\"%s}", kw->name, kw->args);
    222         } else
    223             fprintf(out, "    { NULL }");
    224 
    225         if (i < nkeys-1)
    226             fprintf(out, ",");
    227         fprintf(out, "\n");
    228     }
    229     fprintf(out, "  };\n");
    230 
    231     /* output the hash tab[] array */
    232     make_c_tab(out, tab, smax, blen, scramble);
    233 
    234     /* The hash function body */
    235     fprintf(out, "  const struct %s *ret;\n", struct_name);
    236     for (i=0; i<final.used; ++i)
    237         fprintf(out, "%s", final.line[i]);
    238     fprintf(out, "  if (rsl >= %lu) return NULL;\n", nkeys);
    239     fprintf(out, "  ret = &pd[rsl];\n");
    240     fprintf(out, "  if (strcmp(key, ret->name) != 0) return NULL;\n");
    241     fprintf(out, "  return ret;\n");
    242     fprintf(out, "}\n");
    243     fprintf(out, "\n");
    244 
    245     free(tab);
    246     free(tabh);
    247 }
    248 
    249 int
    250 main(int argc, char *argv[])
    251 {
    252     FILE *in, *out;
    253     size_t i;
    254     char *ch;
    255     static char line[1024], tmp[1024];
    256     static char struct_name[128] = "";
    257     static char lookup_function_name[128] = "in_word_set";
    258     static char language[16] = "";
    259     static char delimiters[16] = ",\r\n";
    260     static char name[128];
    261     static char filename[768];
    262     int need_struct = 0;
    263     int have_struct = 0;
    264     int go_keywords = 0;
    265     int ignore_case = 0;
    266     int compare_strncmp = 0;
    267     int readonly_tables = 0;
    268     slist usercode, usercode2;
    269     keyword_list keywords;
    270     sval *sv;
    271     keyword *kw;
    272 
    273     if (argc != 3) {
    274         fprintf(stderr, "Usage: genperf <in> <out>\n");
    275         return EXIT_FAILURE;
    276     }
    277 
    278     in = fopen(argv[1], "rt");
    279     if (!in) {
    280         fprintf(stderr, "Could not open `%s' for reading\n", argv[1]);
    281         return EXIT_FAILURE;
    282     }
    283 
    284     ch = argv[1];
    285     i = 0;
    286     while (*ch && i < 767) {
    287         if (*ch == '\\') {
    288             filename[i++] = '/';
    289             ch++;
    290         } else
    291             filename[i++] = *ch++;
    292     }
    293     filename[i] = '\0';
    294 
    295     STAILQ_INIT(&usercode);
    296     STAILQ_INIT(&usercode2);
    297     STAILQ_INIT(&keywords);
    298 
    299     /* Parse declarations section */
    300     while (fgets(line, 1024, in)) {
    301         /* Comments start with # as the first thing on a line */
    302         if (line[0] == '#') {
    303             cur_line++;
    304             continue;
    305         }
    306 
    307         /* Handle structure declaration */
    308         if (strncmp(line, "struct", 6) == 0) {
    309             int braces;
    310 
    311             if (!need_struct) {
    312                 report_error("struct without %%struct-type declaration");
    313                 return EXIT_FAILURE;
    314             }
    315             if (have_struct) {
    316                 report_error("more than one struct declaration");
    317                 return EXIT_FAILURE;
    318             }
    319             have_struct = 1;
    320 
    321             /* copy struct name */
    322             ch = &line[6];
    323             while (isspace(*ch))
    324                 ch++;
    325             i = 0;
    326             while ((isalnum(*ch) || *ch == '_') && i < 127)
    327                 struct_name[i++] = *ch++;
    328             if (i == 127) {
    329                 report_error("struct name too long");
    330                 return EXIT_FAILURE;
    331             }
    332             struct_name[i] = '\0';
    333 
    334             sv = yasm_xmalloc(sizeof(sval));
    335             sprintf(tmp, "#line %u \"%s\"\n", cur_line, filename);
    336             sv->str = yasm__xstrdup(tmp);
    337             STAILQ_INSERT_TAIL(&usercode, sv, link);
    338 
    339             braces = 0;
    340             do {
    341                 /* count braces to determine when we're done */
    342                 ch = line;
    343                 while (*ch != '\0') {
    344                     if (*ch == '{')
    345                         braces++;
    346                     if (*ch == '}')
    347                         braces--;
    348                     ch++;
    349                 }
    350                 sv = yasm_xmalloc(sizeof(sval));
    351                 sv->str = yasm__xstrdup(line);
    352                 STAILQ_INSERT_TAIL(&usercode, sv, link);
    353                 cur_line++;
    354                 if (braces <= 0)
    355                     break;
    356             } while (fgets(line, 1024, in));
    357             cur_line++;
    358             continue;
    359         }
    360 
    361         /* Ignore non-declaration lines */
    362         if (line[0] != '%') {
    363             cur_line++;
    364             continue;
    365         }
    366 
    367         /* %% terminates declarations section */
    368         if (line[1] == '%') {
    369             if (need_struct && !have_struct) {
    370                 report_error("%%struct-type declaration, but no struct found");
    371                 return EXIT_FAILURE;
    372             }
    373             go_keywords = 1;
    374             break;      /* move on to keywords section */
    375         }
    376 
    377         /* %{ begins a verbatim code section that ends with %} */
    378         if (line[1] == '{') {
    379             sv = yasm_xmalloc(sizeof(sval));
    380             sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename);
    381             sv->str = yasm__xstrdup(tmp);
    382             STAILQ_INSERT_TAIL(&usercode, sv, link);
    383 
    384             while (fgets(line, 1024, in)) {
    385                 cur_line++;
    386                 if (line[0] == '%' && line[1] == '}')
    387                     break;
    388                 sv = yasm_xmalloc(sizeof(sval));
    389                 sv->str = yasm__xstrdup(line);
    390                 STAILQ_INSERT_TAIL(&usercode, sv, link);
    391             }
    392             cur_line++;
    393             continue;
    394         }
    395 
    396         if (strncmp(&line[1], "ignore-case", 11) == 0) {
    397             ignore_case = 1;
    398         } else if (strncmp(&line[1], "compare-strncmp", 15) == 0) {
    399             compare_strncmp = 1;
    400         } else if (strncmp(&line[1], "readonly-tables", 15) == 0) {
    401             readonly_tables = 1;
    402         } else if (strncmp(&line[1], "language=", 9) == 0) {
    403             ch = &line[10];
    404             i = 0;
    405             while (*ch != '\n' && i<15)
    406                 language[i++] = *ch++;
    407             language[i] = '\0';
    408         } else if (strncmp(&line[1], "delimiters=", 11) == 0) {
    409             ch = &line[12];
    410             i = 0;
    411             while (i<15)
    412                 delimiters[i++] = *ch++;
    413             delimiters[i] = '\0';
    414         } else if (strncmp(&line[1], "enum", 4) == 0) {
    415             /* unused */
    416         } else if (strncmp(&line[1], "struct-type", 11) == 0) {
    417             need_struct = 1;
    418         } else if (strncmp(&line[1], "define", 6) == 0) {
    419             /* Several different defines we need to handle */
    420             ch = &line[7];
    421             while (isspace(*ch))
    422                 ch++;
    423 
    424             if (strncmp(ch, "hash-function-name", 18) == 0) {
    425                 /* unused */
    426             } else if (strncmp(ch, "lookup-function-name", 20) == 0) {
    427                 ch = &line[7+20+1];
    428                 while (isspace(*ch))
    429                     ch++;
    430                 i = 0;
    431                 while ((isalnum(*ch) || *ch == '_') && i < 127)
    432                     lookup_function_name[i++] = *ch++;
    433                 if (i == 127) {
    434                     report_error("struct name too long");
    435                     return EXIT_FAILURE;
    436                 }
    437                 lookup_function_name[i] = '\0';
    438             } else {
    439                 fprintf(stderr, "%u: unrecognized define `%s'\n", cur_line,
    440                         line);
    441             }
    442         } else {
    443             fprintf(stderr, "%u: unrecognized declaration `%s'\n", cur_line,
    444                     line);
    445         }
    446 
    447         cur_line++;
    448     }
    449 
    450     if (!go_keywords) {
    451         report_error("no keywords section found");
    452         return EXIT_FAILURE;
    453     }
    454 
    455     /* Parse keywords section */
    456     while (fgets(line, 1024, in)) {
    457         char *d;
    458 
    459         /* Comments start with # as the first thing on a line */
    460         if (line[0] == '#') {
    461             cur_line++;
    462             continue;
    463         }
    464 
    465         /* Keywords section terminated with %% */
    466         if (line[0] == '%' && line[1] == '%')
    467             break;
    468 
    469         /* Look for name */
    470         ch = &line[0];
    471         i = 0;
    472         while (strchr(delimiters, *ch) == NULL && i < 127)
    473             name[i++] = *ch++;
    474         if (i == 127) {
    475             report_error("keyword name too long");
    476             return EXIT_FAILURE;
    477         }
    478         name[i] = '\0';
    479 
    480         /* Strip EOL */
    481         d = strrchr(ch, '\n');
    482         if (d)
    483             *d = '\0';
    484         d = strrchr(ch, '\r');
    485         if (d)
    486             *d = '\0';
    487         kw = yasm_xmalloc(sizeof(keyword));
    488         kw->name = yasm__xstrdup(name);
    489         kw->args = yasm__xstrdup(ch);
    490         kw->line = cur_line;
    491         STAILQ_INSERT_TAIL(&keywords, kw, link);
    492         cur_line++;
    493     }
    494 
    495     if (errors > 0)
    496         return EXIT_FAILURE;
    497 
    498     /* Pull in any end code */
    499     if (!feof(in)) {
    500         sv = yasm_xmalloc(sizeof(sval));
    501         sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename);
    502         sv->str = yasm__xstrdup(tmp);
    503         STAILQ_INSERT_TAIL(&usercode2, sv, link);
    504 
    505         while (fgets(line, 1024, in)) {
    506             sv = yasm_xmalloc(sizeof(sval));
    507             sv->str = yasm__xstrdup(line);
    508             STAILQ_INSERT_TAIL(&usercode2, sv, link);
    509         }
    510     }
    511 
    512     /* output code */
    513     out = fopen(argv[2], "wt");
    514     if (!out) {
    515         fprintf(stderr, "Could not open `%s' for writing\n", argv[2]);
    516         return EXIT_FAILURE;
    517     }
    518 
    519     fprintf(out, "/* %s code produced by genperf */\n", language);
    520     fprintf(out, "/* Command-line: genperf %s %s */\n", argv[1], argv[2]);
    521 
    522     STAILQ_FOREACH(sv, &usercode, link)
    523         fprintf(out, "%s", sv->str);
    524 
    525     /* Get perfect hash */
    526     perfect_gen(out, lookup_function_name, struct_name, &keywords, filename);
    527 
    528     STAILQ_FOREACH(sv, &usercode2, link)
    529         fprintf(out, "%s", sv->str);
    530 
    531     fclose(out);
    532 
    533     if (errors > 0) {
    534         remove(argv[2]);
    535         return EXIT_FAILURE;
    536     }
    537 
    538     return EXIT_SUCCESS;
    539 }
    540 
    541