1 /* 2 * mksary.c for libdivsufsort 3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person 6 * obtaining a copy of this software and associated documentation 7 * files (the "Software"), to deal in the Software without 8 * restriction, including without limitation the rights to use, 9 * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following 12 * conditions: 13 * 14 * The above copyright notice and this permission notice shall be 15 * included in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 * OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27 #if HAVE_CONFIG_H 28 # include "config.h" 29 #endif 30 #include <stdio.h> 31 #if HAVE_STRING_H 32 # include <string.h> 33 #endif 34 #if HAVE_STDLIB_H 35 # include <stdlib.h> 36 #endif 37 #if HAVE_MEMORY_H 38 # include <memory.h> 39 #endif 40 #if HAVE_STDDEF_H 41 # include <stddef.h> 42 #endif 43 #if HAVE_STRINGS_H 44 # include <strings.h> 45 #endif 46 #if HAVE_SYS_TYPES_H 47 # include <sys/types.h> 48 #endif 49 #if HAVE_IO_H && HAVE_FCNTL_H 50 # include <io.h> 51 # include <fcntl.h> 52 #endif 53 #include <time.h> 54 #include <divsufsort.h> 55 #include "lfs.h" 56 57 58 static 59 void 60 print_help(const char *progname, int status) { 61 fprintf(stderr, 62 "mksary, a simple suffix array builder, version %s.\n", 63 divsufsort_version()); 64 fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname); 65 exit(status); 66 } 67 68 int 69 main(int argc, const char *argv[]) { 70 FILE *fp, *ofp; 71 const char *fname, *ofname; 72 sauchar_t *T; 73 saidx_t *SA; 74 LFS_OFF_T n; 75 clock_t start, finish; 76 saint_t needclose = 3; 77 78 /* Check arguments. */ 79 if((argc == 1) || 80 (strcmp(argv[1], "-h") == 0) || 81 (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } 82 if(argc != 3) { print_help(argv[0], EXIT_FAILURE); } 83 84 /* Open a file for reading. */ 85 if(strcmp(argv[1], "-") != 0) { 86 #if HAVE_FOPEN_S 87 if(fopen_s(&fp, fname = argv[1], "rb") != 0) { 88 #else 89 if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) { 90 #endif 91 fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); 92 perror(NULL); 93 exit(EXIT_FAILURE); 94 } 95 } else { 96 #if HAVE__SETMODE && HAVE__FILENO 97 if(_setmode(_fileno(stdin), _O_BINARY) == -1) { 98 fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 99 perror(NULL); 100 exit(EXIT_FAILURE); 101 } 102 #endif 103 fp = stdin; 104 fname = "stdin"; 105 needclose ^= 1; 106 } 107 108 /* Open a file for writing. */ 109 if(strcmp(argv[2], "-") != 0) { 110 #if HAVE_FOPEN_S 111 if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) { 112 #else 113 if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) { 114 #endif 115 fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname); 116 perror(NULL); 117 exit(EXIT_FAILURE); 118 } 119 } else { 120 #if HAVE__SETMODE && HAVE__FILENO 121 if(_setmode(_fileno(stdout), _O_BINARY) == -1) { 122 fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 123 perror(NULL); 124 exit(EXIT_FAILURE); 125 } 126 #endif 127 ofp = stdout; 128 ofname = "stdout"; 129 needclose ^= 2; 130 } 131 132 /* Get the file size. */ 133 if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { 134 n = LFS_FTELL(fp); 135 rewind(fp); 136 if(n < 0) { 137 fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname); 138 perror(NULL); 139 exit(EXIT_FAILURE); 140 } 141 if(0x7fffffff <= n) { 142 fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname); 143 exit(EXIT_FAILURE); 144 } 145 } else { 146 fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname); 147 perror(NULL); 148 exit(EXIT_FAILURE); 149 } 150 151 /* Allocate 5blocksize bytes of memory. */ 152 T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t)); 153 SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t)); 154 if((T == NULL) || (SA == NULL)) { 155 fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); 156 exit(EXIT_FAILURE); 157 } 158 159 /* Read n bytes of data. */ 160 if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { 161 fprintf(stderr, "%s: %s `%s': ", 162 argv[0], 163 (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", 164 fname); 165 perror(NULL); 166 exit(EXIT_FAILURE); 167 } 168 if(needclose & 1) { fclose(fp); } 169 170 /* Construct the suffix array. */ 171 fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n); 172 start = clock(); 173 if(divsufsort(T, SA, (saidx_t)n) != 0) { 174 fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); 175 exit(EXIT_FAILURE); 176 } 177 finish = clock(); 178 fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC); 179 180 /* Write the suffix array. */ 181 if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) { 182 fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); 183 perror(NULL); 184 exit(EXIT_FAILURE); 185 } 186 if(needclose & 2) { fclose(ofp); } 187 188 /* Deallocate memory. */ 189 free(SA); 190 free(T); 191 192 return 0; 193 } 194