Home | History | Annotate | Download | only in examples
      1 /*
      2  * bwt.c for libdivsufsort
      3  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
      4  *
      5  * Permission is hereby granted, free of charge, to any person
      6  * obtaining a copy of this software and associated documentation
      7  * files (the "Software"), to deal in the Software without
      8  * restriction, including without limitation the rights to use,
      9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
     10  * copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following
     12  * conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be
     15  * included in all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
     19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
     21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     24  * OTHER DEALINGS IN THE SOFTWARE.
     25  */
     26 
     27 #if HAVE_CONFIG_H
     28 # include "config.h"
     29 #endif
     30 #include <stdio.h>
     31 #if HAVE_STRING_H
     32 # include <string.h>
     33 #endif
     34 #if HAVE_STDLIB_H
     35 # include <stdlib.h>
     36 #endif
     37 #if HAVE_MEMORY_H
     38 # include <memory.h>
     39 #endif
     40 #if HAVE_STDDEF_H
     41 # include <stddef.h>
     42 #endif
     43 #if HAVE_STRINGS_H
     44 # include <strings.h>
     45 #endif
     46 #if HAVE_SYS_TYPES_H
     47 # include <sys/types.h>
     48 #endif
     49 #if HAVE_IO_H && HAVE_FCNTL_H
     50 # include <io.h>
     51 # include <fcntl.h>
     52 #endif
     53 #include <time.h>
     54 #include <divsufsort.h>
     55 #include "lfs.h"
     56 
     57 
     58 static
     59 size_t
     60 write_int(FILE *fp, saidx_t n) {
     61   unsigned char c[4];
     62   c[0] = (unsigned char)((n >>  0) & 0xff), c[1] = (unsigned char)((n >>  8) & 0xff),
     63   c[2] = (unsigned char)((n >> 16) & 0xff), c[3] = (unsigned char)((n >> 24) & 0xff);
     64   return fwrite(c, sizeof(unsigned char), 4, fp);
     65 }
     66 
     67 static
     68 void
     69 print_help(const char *progname, int status) {
     70   fprintf(stderr,
     71           "bwt, a burrows-wheeler transform program, version %s.\n",
     72           divsufsort_version());
     73   fprintf(stderr, "usage: %s [-b num] INFILE OUTFILE\n", progname);
     74   fprintf(stderr, "  -b num    set block size to num MiB [1..512] (default: 32)\n\n");
     75   exit(status);
     76 }
     77 
     78 int
     79 main(int argc, const char *argv[]) {
     80   FILE *fp, *ofp;
     81   const char *fname, *ofname;
     82   sauchar_t *T;
     83   saidx_t *SA;
     84   LFS_OFF_T n;
     85   size_t m;
     86   saidx_t pidx;
     87   clock_t start,finish;
     88   saint_t i, blocksize = 32, needclose = 3;
     89 
     90   /* Check arguments. */
     91   if((argc == 1) ||
     92      (strcmp(argv[1], "-h") == 0) ||
     93      (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
     94   if((argc != 3) && (argc != 5)) { print_help(argv[0], EXIT_FAILURE); }
     95   i = 1;
     96   if(argc == 5) {
     97     if(strcmp(argv[i], "-b") != 0) { print_help(argv[0], EXIT_FAILURE); }
     98     blocksize = atoi(argv[i + 1]);
     99     if(blocksize < 0) { blocksize = 1; }
    100     else if(512 < blocksize) { blocksize = 512; }
    101     i += 2;
    102   }
    103   blocksize <<= 20;
    104 
    105   /* Open a file for reading. */
    106   if(strcmp(argv[i], "-") != 0) {
    107 #if HAVE_FOPEN_S
    108     if(fopen_s(&fp, fname = argv[i], "rb") != 0) {
    109 #else
    110     if((fp = LFS_FOPEN(fname = argv[i], "rb")) == NULL) {
    111 #endif
    112       fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
    113       perror(NULL);
    114       exit(EXIT_FAILURE);
    115     }
    116   } else {
    117 #if HAVE__SETMODE && HAVE__FILENO
    118     if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
    119       fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
    120       perror(NULL);
    121       exit(EXIT_FAILURE);
    122     }
    123 #endif
    124     fp = stdin;
    125     fname = "stdin";
    126     needclose ^= 1;
    127   }
    128   i += 1;
    129 
    130   /* Open a file for writing. */
    131   if(strcmp(argv[i], "-") != 0) {
    132 #if HAVE_FOPEN_S
    133     if(fopen_s(&ofp, ofname = argv[i], "wb") != 0) {
    134 #else
    135     if((ofp = LFS_FOPEN(ofname = argv[i], "wb")) == NULL) {
    136 #endif
    137       fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
    138       perror(NULL);
    139       exit(EXIT_FAILURE);
    140     }
    141   } else {
    142 #if HAVE__SETMODE && HAVE__FILENO
    143     if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
    144       fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
    145       perror(NULL);
    146       exit(EXIT_FAILURE);
    147     }
    148 #endif
    149     ofp = stdout;
    150     ofname = "stdout";
    151     needclose ^= 2;
    152   }
    153 
    154   /* Get the file size. */
    155   if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
    156     n = LFS_FTELL(fp);
    157     rewind(fp);
    158     if(n < 0) {
    159       fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
    160       perror(NULL);
    161       exit(EXIT_FAILURE);
    162     }
    163     if(0x20000000L < n) { n = 0x20000000L; }
    164     if((blocksize == 0) || (n < blocksize)) { blocksize = (saidx_t)n; }
    165   } else if(blocksize == 0) { blocksize = 32 << 20; }
    166 
    167   /* Allocate 5blocksize bytes of memory. */
    168   T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
    169   SA = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
    170   if((T == NULL) || (SA == NULL)) {
    171     fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
    172     exit(EXIT_FAILURE);
    173   }
    174 
    175   /* Write the blocksize. */
    176   if(write_int(ofp, blocksize) != 4) {
    177     fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
    178     perror(NULL);
    179     exit(EXIT_FAILURE);
    180   }
    181 
    182   fprintf(stderr, "  BWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
    183   start = clock();
    184   for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, fp)); n += m) {
    185     /* Burrows-Wheeler Transform. */
    186     pidx = divbwt(T, T, SA, m);
    187     if(pidx < 0) {
    188       fprintf(stderr, "%s (bw_transform): %s.\n",
    189         argv[0],
    190         (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
    191       exit(EXIT_FAILURE);
    192     }
    193 
    194     /* Write the bwted data. */
    195     if((write_int(ofp, pidx) != 4) ||
    196        (fwrite(T, sizeof(sauchar_t), m, ofp) != m)) {
    197       fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
    198       perror(NULL);
    199       exit(EXIT_FAILURE);
    200     }
    201   }
    202   if(ferror(fp)) {
    203     fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
    204     perror(NULL);
    205     exit(EXIT_FAILURE);
    206   }
    207   finish = clock();
    208   fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
    209     n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
    210 
    211   /* Close files */
    212   if(needclose & 1) { fclose(fp); }
    213   if(needclose & 2) { fclose(ofp); }
    214 
    215   /* Deallocate memory. */
    216   free(SA);
    217   free(T);
    218 
    219   return 0;
    220 }
    221