Home | History | Annotate | Download | only in shootout
      1 /*
      2 Redistribution and use in source and binary forms, with or without
      3 modification, are permitted provided that the following conditions are met:
      4 
      5     * Redistributions of source code must retain the above copyright
      6     notice, this list of conditions and the following disclaimer.
      7 
      8     * Redistributions in binary form must reproduce the above copyright
      9     notice, this list of conditions and the following disclaimer in the
     10     documentation and/or other materials provided with the distribution.
     11 
     12     * Neither the name of "The Computer Language Benchmarks Game" nor the
     13     name of "The Computer Language Shootout Benchmarks" nor the names of
     14     its contributors may be used to endorse or promote products derived
     15     from this software without specific prior written permission.
     16 
     17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20 ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27 POSSIBILITY OF SUCH DAMAGE.
     28 */
     29 
     30 #include <stdio.h>
     31 #include <string.h>
     32 #include <ctype.h>
     33 #include <stdlib.h>
     34 #include <glib.h>
     35 
     36 typedef struct stat_s stat_t;
     37 struct stat_s
     38 {
     39    const gchar *key;
     40    long stat;
     41 };
     42 
     43 #define MAX_ELM (8192 / sizeof (stat_t))
     44 
     45 static int
     46 generate_frequencies (int fl, char *buffer, long buflen,
     47 		      GHashTable *ht, GTrashStack **ts, GPtrArray *roots, GStringChunk *sc)
     48 {
     49    gchar *key;
     50    long i;
     51 
     52    if (fl > buflen) return 0;
     53    if (fl == 0) return 0;
     54 
     55    for (i = 0; i < buflen - fl + 1; ++i)
     56      {
     57 	char nulled;
     58 	stat_t *stat;
     59 
     60 	nulled = buffer[i + fl];
     61 	buffer[i + fl] = '\0';
     62 
     63 	key = g_string_chunk_insert_const(sc, buffer + i);
     64 
     65 	stat = g_hash_table_lookup(ht, key);
     66 	if (!stat)
     67 	  {
     68 	     stat = g_trash_stack_pop(ts);
     69 	     if (!stat)
     70 	       {
     71 		  int j;
     72 
     73 		  stat = malloc(sizeof (stat_t) * MAX_ELM);
     74 		  g_ptr_array_add(roots, stat);
     75 
     76 		  for (j = 1; j < MAX_ELM; ++j)
     77 		    g_trash_stack_push(ts, stat + j);
     78 	       }
     79 	     stat->stat = 1;
     80 	     stat->key = key;
     81 
     82 	     g_hash_table_insert(ht, key, stat);
     83 	  }
     84 	else
     85 	  stat->stat++;
     86 
     87 	buffer[i + fl] = nulled;
     88      }
     89 
     90    return buflen - fl + 1;
     91 }
     92 
     93 static int
     94 cmp_func(gconstpointer a, gconstpointer b)
     95 {
     96    const stat_t *left = a;
     97    const stat_t *right = b;
     98 
     99    return right->stat - left->stat;
    100 }
    101 
    102 static void
    103 sorted_list(gpointer key, gpointer value, gpointer user_data)
    104 {
    105    stat_t *data = value;
    106    GList **lst = user_data;
    107 
    108    *lst = g_list_insert_sorted(*lst, data, cmp_func);
    109 }
    110 
    111 static void
    112 display_stat(gpointer data, gpointer user_data)
    113 {
    114    long *total = user_data;
    115    stat_t *st = data;
    116 
    117    printf("%s %.3f\n", st->key, 100 * (float) st->stat / *total);
    118 }
    119 
    120 void
    121 write_frequencies (int fl, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots)
    122 {
    123    GStringChunk *sc;
    124    GHashTable *ht;
    125    GList *lst;
    126    long total;
    127 
    128    ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */);
    129    sc = g_string_chunk_new(buflen);
    130    lst = NULL;
    131 
    132    total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc);
    133 
    134    if (!total) goto on_error;
    135 
    136    g_hash_table_foreach(ht, sorted_list, &lst);
    137    g_list_foreach(lst, display_stat, &total);
    138    g_list_free(lst);
    139 
    140  on_error:
    141    g_hash_table_destroy(ht);
    142    g_string_chunk_free(sc);
    143 }
    144 
    145 void
    146 write_count (char *searchFor, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots)
    147 {
    148    GStringChunk *sc;
    149    GHashTable *ht;
    150    stat_t *result;
    151    GList *lst;
    152    long total;
    153    long fl;
    154 
    155    fl = strlen(searchFor);
    156 
    157    ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */);
    158    sc = g_string_chunk_new(buflen);
    159    lst = NULL;
    160    result = NULL;
    161 
    162    total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc);
    163 
    164    if (!total) goto on_error;
    165 
    166    result = g_hash_table_lookup(ht, searchFor);
    167 
    168  on_error:
    169    printf("%ld\t%s\n", result ? result->stat : 0, searchFor);
    170 
    171    g_hash_table_destroy(ht);
    172    g_string_chunk_free(sc);
    173 }
    174 
    175 int
    176 main ()
    177 {
    178    char buffer[4096];
    179    GTrashStack *ts;
    180    GPtrArray *roots;
    181    GString *stuff;
    182    gchar *s;
    183    int len;
    184 
    185    roots = g_ptr_array_new();
    186    ts = NULL;
    187 
    188    while (fgets(buffer, sizeof (buffer), stdin))
    189      if (strncmp(buffer, ">THREE", 6) == 0)
    190        break;
    191 
    192    stuff = g_string_new(NULL);
    193 
    194    while (fgets(buffer, sizeof (buffer), stdin))
    195      {
    196 	size_t sz;
    197 
    198 	if (buffer[0] == '>')
    199 	  break;
    200 
    201 	sz = strlen(buffer);
    202 	if (buffer[sz - 1] == '\n')
    203 	  --sz;
    204 
    205 	stuff = g_string_append_len(stuff, buffer, sz);
    206      }
    207 
    208    stuff = g_string_ascii_up(stuff);
    209    len = stuff->len;
    210    s = g_string_free(stuff, FALSE);
    211 
    212    write_frequencies(1, s, len, &ts, roots);
    213    printf("\n");
    214    write_frequencies(2, s, len, &ts, roots);
    215    printf("\n");
    216    write_count("GGT", s, len, &ts, roots);
    217    write_count("GGTA", s, len, &ts, roots);
    218    write_count("GGTATT", s, len, &ts, roots);
    219    write_count("GGTATTTTAATT", s, len, &ts, roots);
    220    write_count("GGTATTTTAATTTATAGT", s, len, &ts, roots);
    221 
    222    free(s);
    223 
    224    g_ptr_array_foreach(roots, (GFunc)free, NULL);
    225    g_ptr_array_free(roots, TRUE);
    226 
    227    return 0;
    228 }
    229