Home | History | Annotate | Download | only in programs
      1 /*
      2     datagen.c - compressible data generator test tool
      3     Copyright (C) Yann Collet 2012-2015
      4 
      5     GPL v2 License
      6 
      7     This program is free software; you can redistribute it and/or modify
      8     it under the terms of the GNU General Public License as published by
      9     the Free Software Foundation; either version 2 of the License, or
     10     (at your option) any later version.
     11 
     12     This program is distributed in the hope that it will be useful,
     13     but WITHOUT ANY WARRANTY; without even the implied warranty of
     14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15     GNU General Public License for more details.
     16 
     17     You should have received a copy of the GNU General Public License along
     18     with this program; if not, write to the Free Software Foundation, Inc.,
     19     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
     20 
     21     You can contact the author at :
     22    - LZ4 source repository : http://code.google.com/p/lz4
     23    - LZ4 source mirror : https://github.com/Cyan4973/lz4
     24    - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
     25 */
     26 
     27 /**************************************
     28  Remove Visual warning messages
     29 **************************************/
     30 #define _CRT_SECURE_NO_WARNINGS   // fgets
     31 
     32 
     33 /**************************************
     34  Includes
     35 **************************************/
     36 #include <stdio.h>      // fgets, sscanf
     37 #include <string.h>     // strcmp
     38 
     39 
     40 /**************************************
     41    Basic Types
     42 **************************************/
     43 #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
     44 # include <stdint.h>
     45   typedef  uint8_t BYTE;
     46   typedef uint16_t U16;
     47   typedef uint32_t U32;
     48   typedef  int32_t S32;
     49   typedef uint64_t U64;
     50 #else
     51   typedef unsigned char       BYTE;
     52   typedef unsigned short      U16;
     53   typedef unsigned int        U32;
     54   typedef   signed int        S32;
     55   typedef unsigned long long  U64;
     56 #endif
     57 
     58 
     59 /**************************************
     60  Constants
     61 **************************************/
     62 #ifndef LZ4_VERSION
     63 #  define LZ4_VERSION "r125"
     64 #endif
     65 
     66 #define KB *(1 <<10)
     67 #define MB *(1 <<20)
     68 #define GB *(1U<<30)
     69 
     70 #define CDG_SIZE_DEFAULT (64 KB)
     71 #define CDG_SEED_DEFAULT 0
     72 #define CDG_COMPRESSIBILITY_DEFAULT 50
     73 #define PRIME1   2654435761U
     74 #define PRIME2   2246822519U
     75 
     76 
     77 /**************************************
     78   Macros
     79 **************************************/
     80 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
     81 #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
     82 
     83 
     84 /**************************************
     85   Local Parameters
     86 **************************************/
     87 static unsigned no_prompt = 0;
     88 static char*    programName;
     89 static unsigned displayLevel = 2;
     90 
     91 
     92 /*********************************************************
     93   functions
     94 *********************************************************/
     95 
     96 #define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
     97 static unsigned int CDG_rand(U32* src)
     98 {
     99     U32 rand32 = *src;
    100     rand32 *= PRIME1;
    101     rand32 += PRIME2;
    102     rand32  = CDG_rotl32(rand32, 13);
    103     *src = rand32;
    104     return rand32;
    105 }
    106 
    107 
    108 #define CDG_RAND15BITS  ((CDG_rand(seed) >> 3) & 32767)
    109 #define CDG_RANDLENGTH  ( ((CDG_rand(seed) >> 7) & 3) ? (CDG_rand(seed) % 14) : (CDG_rand(seed) & 511) + 15)
    110 #define CDG_RANDCHAR    (((CDG_rand(seed) >> 9) & 63) + '0')
    111 static void CDG_generate(U64 size, U32* seed, double proba)
    112 {
    113     BYTE fullbuff[32 KB + 128 KB + 1];
    114     BYTE* buff = fullbuff + 32 KB;
    115     U64 total=0;
    116     U32 P32 = (U32)(32768 * proba);
    117     U32 pos=1;
    118     U32 genBlockSize = 128 KB;
    119 
    120     // Build initial prefix
    121     fullbuff[0] = CDG_RANDCHAR;
    122     while (pos<32 KB)
    123     {
    124         // Select : Literal (char) or Match (within 32K)
    125         if (CDG_RAND15BITS < P32)
    126         {
    127             // Copy (within 64K)
    128             U32 d;
    129             int ref;
    130             int length = CDG_RANDLENGTH + 4;
    131             U32 offset = CDG_RAND15BITS + 1;
    132             if (offset > pos) offset = pos;
    133             ref = pos - offset;
    134             d = pos + length;
    135             while (pos < d) fullbuff[pos++] = fullbuff[ref++];
    136         }
    137         else
    138         {
    139             // Literal (noise)
    140             U32 d = pos + CDG_RANDLENGTH;
    141             while (pos < d) fullbuff[pos++] = CDG_RANDCHAR;
    142         }
    143     }
    144 
    145     // Generate compressible data
    146     pos = 0;
    147     while (total < size)
    148     {
    149         if (size-total < 128 KB) genBlockSize = (U32)(size-total);
    150         total += genBlockSize;
    151         buff[genBlockSize] = 0;
    152         pos = 0;
    153         while (pos<genBlockSize)
    154         {
    155             // Select : Literal (char) or Match (within 32K)
    156             if (CDG_RAND15BITS < P32)
    157             {
    158                 // Copy (within 64K)
    159                 int ref;
    160                 U32 d;
    161                 int length = CDG_RANDLENGTH + 4;
    162                 U32 offset = CDG_RAND15BITS + 1;
    163                 if (pos + length > genBlockSize ) length = genBlockSize - pos;
    164                 ref = pos - offset;
    165                 d = pos + length;
    166                 while (pos < d) buff[pos++] = buff[ref++];
    167             }
    168             else
    169             {
    170                 // Literal (noise)
    171                 U32 d;
    172                 int length = CDG_RANDLENGTH;
    173                 if (pos + length > genBlockSize) length = genBlockSize - pos;
    174                 d = pos + length;
    175                 while (pos < d) buff[pos++] = CDG_RANDCHAR;
    176             }
    177         }
    178         // output datagen
    179         pos=0;
    180         for (;pos+512<=genBlockSize;pos+=512)
    181             printf("%512.512s", buff+pos);
    182         for (;pos<genBlockSize;pos++) printf("%c", buff[pos]);
    183         // Regenerate prefix
    184         memcpy(fullbuff, buff + 96 KB, 32 KB);
    185     }
    186 }
    187 
    188 
    189 int CDG_usage(void)
    190 {
    191     DISPLAY( "Compressible data generator\n");
    192     DISPLAY( "Usage :\n");
    193     DISPLAY( "      %s [size] [args]\n", programName);
    194     DISPLAY( "\n");
    195     DISPLAY( "Arguments :\n");
    196     DISPLAY( " -g#    : generate # data (default:%i)\n", CDG_SIZE_DEFAULT);
    197     DISPLAY( " -s#    : Select seed (default:%i)\n", CDG_SEED_DEFAULT);
    198     DISPLAY( " -p#    : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT);
    199     DISPLAY( " -h     : display help and exit\n");
    200     return 0;
    201 }
    202 
    203 
    204 int main(int argc, char** argv)
    205 {
    206     int argNb;
    207     int proba = CDG_COMPRESSIBILITY_DEFAULT;
    208     U64 size = CDG_SIZE_DEFAULT;
    209     U32 seed = CDG_SEED_DEFAULT;
    210 
    211     // Check command line
    212     programName = argv[0];
    213     for(argNb=1; argNb<argc; argNb++)
    214     {
    215         char* argument = argv[argNb];
    216 
    217         if(!argument) continue;   // Protection if argument empty
    218 
    219         // Decode command (note : aggregated commands are allowed)
    220         if (*argument=='-')
    221         {
    222             if (!strcmp(argument, "--no-prompt")) { no_prompt=1; continue; }
    223 
    224             argument++;
    225             while (*argument!=0)
    226             {
    227                 switch(*argument)
    228                 {
    229                 case 'h':
    230                     return CDG_usage();
    231                 case 'g':
    232                     argument++;
    233                     size=0;
    234                     while ((*argument>='0') && (*argument<='9'))
    235                     {
    236                         size *= 10;
    237                         size += *argument - '0';
    238                         argument++;
    239                     }
    240                     if (*argument=='K') { size <<= 10; argument++; }
    241                     if (*argument=='M') { size <<= 20; argument++; }
    242                     if (*argument=='G') { size <<= 30; argument++; }
    243                     if (*argument=='B') { argument++; }
    244                     break;
    245                 case 's':
    246                     argument++;
    247                     seed=0;
    248                     while ((*argument>='0') && (*argument<='9'))
    249                     {
    250                         seed *= 10;
    251                         seed += *argument - '0';
    252                         argument++;
    253                     }
    254                     break;
    255                 case 'p':
    256                     argument++;
    257                     proba=0;
    258                     while ((*argument>='0') && (*argument<='9'))
    259                     {
    260                         proba *= 10;
    261                         proba += *argument - '0';
    262                         argument++;
    263                     }
    264                     if (proba<0) proba=0;
    265                     if (proba>100) proba=100;
    266                     break;
    267                 case 'v':
    268                     displayLevel = 4;
    269                     argument++;
    270                     break;
    271                 default: ;
    272                 }
    273             }
    274 
    275         }
    276     }
    277 
    278     // Get Seed
    279     DISPLAYLEVEL(4, "Data Generator %s \n", LZ4_VERSION);
    280     DISPLAYLEVEL(3, "Seed = %u \n", seed);
    281     if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", proba);
    282 
    283     CDG_generate(size, &seed, ((double)proba) / 100);
    284 
    285     return 0;
    286 }
    287