1 #include <cstddef> 2 #include <cstdio> 3 #include <cstring> 4 #include <fstream> 5 #include <vector> 6 7 #include "./deorummolae.h" 8 #include "./durchschlag.h" 9 #include "./sieve.h" 10 11 #define METHOD_DM 0 12 #define METHOD_SIEVE 1 13 #define METHOD_DURCHSCHLAG 2 14 #define METHOD_DISTILL 3 15 #define METHOD_PURIFY 4 16 17 static size_t readInt(const char* str) { 18 size_t result = 0; 19 if (str[0] == 0 || str[0] == '0') { 20 return 0; 21 } 22 for (size_t i = 0; i < 13; ++i) { 23 if (str[i] == 0) { 24 return result; 25 } 26 if (str[i] == 'k' || str[i] == 'K') { 27 if ((str[i + 1] == 0) && ((result << 10) > result)) { 28 return result << 10; 29 } 30 return 0; 31 } 32 if (str[i] == 'm' || str[i] == 'M') { 33 if ((str[i + 1] == 0) && ((result << 20) > result)) { 34 return result << 20; 35 } 36 return 0; 37 } 38 if (str[i] < '0' || str[i] > '9') { 39 return 0; 40 } 41 size_t next = (10 * result) + (str[i] - '0'); 42 if (next <= result) { 43 return 0; 44 } 45 result = next; 46 } 47 return 0; 48 } 49 50 static std::string readFile(const std::string& path) { 51 std::ifstream file(path); 52 std::string content( 53 (std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>()); 54 return content; 55 } 56 57 static void writeFile(const char* file, const std::string& content) { 58 std::ofstream outfile(file, std::ofstream::binary); 59 outfile.write(content.c_str(), static_cast<std::streamsize>(content.size())); 60 outfile.close(); 61 } 62 63 static void writeSamples(char const* argv[], const std::vector<int>& pathArgs, 64 const std::vector<size_t>& sizes, const uint8_t* data) { 65 size_t offset = 0; 66 for (size_t i = 0; i < pathArgs.size(); ++i) { 67 int j = pathArgs[i]; 68 const char* file = argv[j]; 69 size_t sampleSize = sizes[i]; 70 std::ofstream outfile(file, std::ofstream::binary); 71 outfile.write(reinterpret_cast<const char*>(data + offset), 72 static_cast<std::streamsize>(sampleSize)); 73 outfile.close(); 74 offset += sampleSize; 75 } 76 } 77 78 /* Returns "base file name" or its tail, if it contains '/' or '\'. */ 79 static const char* fileName(const char* path) { 80 const char* separator_position = strrchr(path, '/'); 81 if (separator_position) path = separator_position + 1; 82 separator_position = strrchr(path, '\\'); 83 if (separator_position) path = separator_position + 1; 84 return path; 85 } 86 87 static void printHelp(const char* name) { 88 fprintf(stderr, "Usage: %s [OPTION]... DICTIONARY [SAMPLE]...\n", name); 89 fprintf(stderr, 90 "Options:\n" 91 " --dm use 'deorummolae' engine\n" 92 " --distill rewrite samples; unique text parts are removed\n" 93 " --dsh use 'durchschlag' engine (default)\n" 94 " --purify rewrite samples; unique text parts are zeroed out\n" 95 " --sieve use 'sieve' engine\n" 96 " -b# set block length for 'durchschlag'; default: 1024\n" 97 " -s# set slice length for 'distill', 'durchschlag', 'purify'\n" 98 " and 'sieve'; default: 16\n" 99 " -t# set target dictionary size (limit); default: 16K\n" 100 " -u# set minimum slice population (for rewrites); default: 2\n" 101 "# is a decimal number with optional k/K/m/M suffix.\n" 102 "WARNING: 'distill' and 'purify' will overwrite original samples!\n" 103 " Completely unique samples might become empty files.\n\n"); 104 } 105 106 int main(int argc, char const* argv[]) { 107 int dictionaryArg = -1; 108 int method = METHOD_DURCHSCHLAG; 109 size_t sliceLen = 16; 110 size_t targetSize = 16 << 10; 111 size_t blockSize = 1024; 112 size_t minimumPopulation = 2; 113 114 std::vector<uint8_t> data; 115 std::vector<size_t> sizes; 116 std::vector<int> pathArgs; 117 size_t total = 0; 118 for (int i = 1; i < argc; ++i) { 119 if (argv[i] == nullptr) { 120 continue; 121 } 122 if (argv[i][0] == '-') { 123 if (argv[i][1] == '-') { 124 if (dictionaryArg != -1) { 125 fprintf(stderr, 126 "Method should be specified before dictionary / sample '%s'\n", 127 argv[i]); 128 exit(1); 129 } 130 if (std::strcmp("--sieve", argv[i]) == 0) { 131 method = METHOD_SIEVE; 132 continue; 133 } 134 if (std::strcmp("--dm", argv[i]) == 0) { 135 method = METHOD_DM; 136 continue; 137 } 138 if (std::strcmp("--dsh", argv[i]) == 0) { 139 method = METHOD_DURCHSCHLAG; 140 continue; 141 } 142 if (std::strcmp("--distill", argv[i]) == 0) { 143 method = METHOD_DISTILL; 144 continue; 145 } 146 if (std::strcmp("--purify", argv[i]) == 0) { 147 method = METHOD_PURIFY; 148 continue; 149 } 150 printHelp(fileName(argv[0])); 151 fprintf(stderr, "Invalid option '%s'\n", argv[i]); 152 exit(1); 153 } 154 if (argv[i][1] == 'b') { 155 blockSize = readInt(&argv[i][2]); 156 if (blockSize < 16 || blockSize > 65536) { 157 printHelp(fileName(argv[0])); 158 fprintf(stderr, "Invalid option '%s'\n", argv[i]); 159 exit(1); 160 } 161 } else if (argv[i][1] == 's') { 162 sliceLen = readInt(&argv[i][2]); 163 if (sliceLen < 4 || sliceLen > 256) { 164 printHelp(fileName(argv[0])); 165 fprintf(stderr, "Invalid option '%s'\n", argv[i]); 166 exit(1); 167 } 168 } else if (argv[i][1] == 't') { 169 targetSize = readInt(&argv[i][2]); 170 if (targetSize < 256 || targetSize > (1 << 25)) { 171 printHelp(fileName(argv[0])); 172 fprintf(stderr, "Invalid option '%s'\n", argv[i]); 173 exit(1); 174 } 175 } else if (argv[i][1] == 'u') { 176 minimumPopulation = readInt(&argv[i][2]); 177 if (minimumPopulation < 256 || minimumPopulation > 65536) { 178 printHelp(fileName(argv[0])); 179 fprintf(stderr, "Invalid option '%s'\n", argv[i]); 180 exit(1); 181 } 182 } else { 183 printHelp(fileName(argv[0])); 184 fprintf(stderr, "Unrecognized option '%s'\n", argv[i]); 185 exit(1); 186 } 187 continue; 188 } 189 if (dictionaryArg == -1) { 190 if (method != METHOD_DISTILL && method != METHOD_PURIFY) { 191 dictionaryArg = i; 192 continue; 193 } 194 } 195 std::string content = readFile(argv[i]); 196 data.insert(data.end(), content.begin(), content.end()); 197 total += content.size(); 198 pathArgs.push_back(i); 199 sizes.push_back(content.size()); 200 } 201 bool wantDictionary = (dictionaryArg == -1); 202 if (method == METHOD_DISTILL || method == METHOD_PURIFY) { 203 wantDictionary = false; 204 } 205 if (wantDictionary || total == 0) { 206 printHelp(fileName(argv[0])); 207 fprintf(stderr, "Not enough arguments\n"); 208 exit(1); 209 } 210 211 if (method == METHOD_SIEVE) { 212 writeFile(argv[dictionaryArg], sieve_generate( 213 targetSize, sliceLen, sizes, data.data())); 214 } else if (method == METHOD_DM) { 215 writeFile(argv[dictionaryArg], DM_generate( 216 targetSize, sizes, data.data())); 217 } else if (method == METHOD_DURCHSCHLAG) { 218 writeFile(argv[dictionaryArg], durchschlag_generate( 219 targetSize, sliceLen, blockSize, sizes, data.data())); 220 } else if (method == METHOD_DISTILL) { 221 durchschlag_distill(sliceLen, minimumPopulation, &sizes, data.data()); 222 writeSamples(argv, pathArgs, sizes, data.data()); 223 } else if (method == METHOD_PURIFY) { 224 durchschlag_purify(sliceLen, minimumPopulation, sizes, data.data()); 225 writeSamples(argv, pathArgs, sizes, data.data()); 226 } else { 227 printHelp(fileName(argv[0])); 228 fprintf(stderr, "Unknown generator\n"); 229 exit(1); 230 } 231 return 0; 232 } 233