Home | History | Annotate | Download | only in bzip2
      1 /*-----------------------------------------------------------*/
      2 /*--- Block recoverer program for bzip2                   ---*/
      3 /*---                                      bzip2recover.c ---*/
      4 /*-----------------------------------------------------------*/
      5 
      6 /* ------------------------------------------------------------------
      7    This file is part of bzip2/libbzip2, a program and library for
      8    lossless, block-sorting data compression.
      9 
     10    bzip2/libbzip2 version 1.0.6 of 6 September 2010
     11    Copyright (C) 1996-2010 Julian Seward <jseward (at) bzip.org>
     12 
     13    Please read the WARNING, DISCLAIMER and PATENTS sections in the
     14    README file.
     15 
     16    This program is released under the terms of the license contained
     17    in the file LICENSE.
     18    ------------------------------------------------------------------ */
     19 
     20 /* This program is a complete hack and should be rewritten properly.
     21 	 It isn't very complicated. */
     22 
     23 #include <stdio.h>
     24 #include <errno.h>
     25 #include <stdlib.h>
     26 #include <string.h>
     27 
     28 
     29 /* This program records bit locations in the file to be recovered.
     30    That means that if 64-bit ints are not supported, we will not
     31    be able to recover .bz2 files over 512MB (2^32 bits) long.
     32    On GNU supported platforms, we take advantage of the 64-bit
     33    int support to circumvent this problem.  Ditto MSVC.
     34 
     35    This change occurred in version 1.0.2; all prior versions have
     36    the 512MB limitation.
     37 */
     38 #ifdef __GNUC__
     39    typedef  unsigned long long int  MaybeUInt64;
     40 #  define MaybeUInt64_FMT "%Lu"
     41 #else
     42 #ifdef _MSC_VER
     43    typedef  unsigned __int64  MaybeUInt64;
     44 #  define MaybeUInt64_FMT "%I64u"
     45 #else
     46    typedef  unsigned int   MaybeUInt64;
     47 #  define MaybeUInt64_FMT "%u"
     48 #endif
     49 #endif
     50 
     51 typedef  unsigned int   UInt32;
     52 typedef  int            Int32;
     53 typedef  unsigned char  UChar;
     54 typedef  char           Char;
     55 typedef  unsigned char  Bool;
     56 #define True    ((Bool)1)
     57 #define False   ((Bool)0)
     58 
     59 
     60 #define BZ_MAX_FILENAME 2000
     61 
     62 Char inFileName[BZ_MAX_FILENAME];
     63 Char outFileName[BZ_MAX_FILENAME];
     64 Char progName[BZ_MAX_FILENAME];
     65 
     66 MaybeUInt64 bytesOut = 0;
     67 MaybeUInt64 bytesIn  = 0;
     68 
     69 
     70 /*---------------------------------------------------*/
     71 /*--- Header bytes                                ---*/
     72 /*---------------------------------------------------*/
     73 
     74 #define BZ_HDR_B 0x42                         /* 'B' */
     75 #define BZ_HDR_Z 0x5a                         /* 'Z' */
     76 #define BZ_HDR_h 0x68                         /* 'h' */
     77 #define BZ_HDR_0 0x30                         /* '0' */
     78 
     79 
     80 /*---------------------------------------------------*/
     81 /*--- I/O errors                                  ---*/
     82 /*---------------------------------------------------*/
     83 
     84 /*---------------------------------------------*/
     85 static void readError ( void )
     86 {
     87    fprintf ( stderr,
     88              "%s: I/O error reading `%s', possible reason follows.\n",
     89             progName, inFileName );
     90    perror ( progName );
     91    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
     92              progName );
     93    exit ( 1 );
     94 }
     95 
     96 
     97 /*---------------------------------------------*/
     98 static void writeError ( void )
     99 {
    100    fprintf ( stderr,
    101              "%s: I/O error reading `%s', possible reason follows.\n",
    102             progName, inFileName );
    103    perror ( progName );
    104    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
    105              progName );
    106    exit ( 1 );
    107 }
    108 
    109 
    110 /*---------------------------------------------*/
    111 static void mallocFail ( Int32 n )
    112 {
    113    fprintf ( stderr,
    114              "%s: malloc failed on request for %d bytes.\n",
    115             progName, n );
    116    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
    117              progName );
    118    exit ( 1 );
    119 }
    120 
    121 
    122 /*---------------------------------------------*/
    123 static void tooManyBlocks ( Int32 max_handled_blocks )
    124 {
    125    fprintf ( stderr,
    126              "%s: `%s' appears to contain more than %d blocks\n",
    127             progName, inFileName, max_handled_blocks );
    128    fprintf ( stderr,
    129              "%s: and cannot be handled.  To fix, increase\n",
    130              progName );
    131    fprintf ( stderr,
    132              "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
    133              progName );
    134    exit ( 1 );
    135 }
    136 
    137 
    138 
    139 /*---------------------------------------------------*/
    140 /*--- Bit stream I/O                              ---*/
    141 /*---------------------------------------------------*/
    142 
    143 typedef
    144    struct {
    145       FILE*  handle;
    146       Int32  buffer;
    147       Int32  buffLive;
    148       Char   mode;
    149    }
    150    BitStream;
    151 
    152 
    153 /*---------------------------------------------*/
    154 static BitStream* bsOpenReadStream ( FILE* stream )
    155 {
    156    BitStream *bs = malloc ( sizeof(BitStream) );
    157    if (bs == NULL) mallocFail ( sizeof(BitStream) );
    158    bs->handle = stream;
    159    bs->buffer = 0;
    160    bs->buffLive = 0;
    161    bs->mode = 'r';
    162    return bs;
    163 }
    164 
    165 
    166 /*---------------------------------------------*/
    167 static BitStream* bsOpenWriteStream ( FILE* stream )
    168 {
    169    BitStream *bs = malloc ( sizeof(BitStream) );
    170    if (bs == NULL) mallocFail ( sizeof(BitStream) );
    171    bs->handle = stream;
    172    bs->buffer = 0;
    173    bs->buffLive = 0;
    174    bs->mode = 'w';
    175    return bs;
    176 }
    177 
    178 
    179 /*---------------------------------------------*/
    180 static void bsPutBit ( BitStream* bs, Int32 bit )
    181 {
    182    if (bs->buffLive == 8) {
    183       Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
    184       if (retVal == EOF) writeError();
    185       bytesOut++;
    186       bs->buffLive = 1;
    187       bs->buffer = bit & 0x1;
    188    } else {
    189       bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
    190       bs->buffLive++;
    191    };
    192 }
    193 
    194 
    195 /*---------------------------------------------*/
    196 /*--
    197    Returns 0 or 1, or 2 to indicate EOF.
    198 --*/
    199 static Int32 bsGetBit ( BitStream* bs )
    200 {
    201    if (bs->buffLive > 0) {
    202       bs->buffLive --;
    203       return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
    204    } else {
    205       Int32 retVal = getc ( bs->handle );
    206       if ( retVal == EOF ) {
    207          if (errno != 0) readError();
    208          return 2;
    209       }
    210       bs->buffLive = 7;
    211       bs->buffer = retVal;
    212       return ( ((bs->buffer) >> 7) & 0x1 );
    213    }
    214 }
    215 
    216 
    217 /*---------------------------------------------*/
    218 static void bsClose ( BitStream* bs )
    219 {
    220    Int32 retVal;
    221 
    222    if ( bs->mode == 'w' ) {
    223       while ( bs->buffLive < 8 ) {
    224          bs->buffLive++;
    225          bs->buffer <<= 1;
    226       };
    227       retVal = putc ( (UChar) (bs->buffer), bs->handle );
    228       if (retVal == EOF) writeError();
    229       bytesOut++;
    230       retVal = fflush ( bs->handle );
    231       if (retVal == EOF) writeError();
    232    }
    233    retVal = fclose ( bs->handle );
    234    if (retVal == EOF) {
    235       if (bs->mode == 'w') writeError(); else readError();
    236    }
    237    free ( bs );
    238 }
    239 
    240 
    241 /*---------------------------------------------*/
    242 static void bsPutUChar ( BitStream* bs, UChar c )
    243 {
    244    Int32 i;
    245    for (i = 7; i >= 0; i--)
    246       bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
    247 }
    248 
    249 
    250 /*---------------------------------------------*/
    251 static void bsPutUInt32 ( BitStream* bs, UInt32 c )
    252 {
    253    Int32 i;
    254 
    255    for (i = 31; i >= 0; i--)
    256       bsPutBit ( bs, (c >> i) & 0x1 );
    257 }
    258 
    259 
    260 /*---------------------------------------------*/
    261 static Bool endsInBz2 ( Char* name )
    262 {
    263    Int32 n = strlen ( name );
    264    if (n <= 4) return False;
    265    return
    266       (name[n-4] == '.' &&
    267        name[n-3] == 'b' &&
    268        name[n-2] == 'z' &&
    269        name[n-1] == '2');
    270 }
    271 
    272 
    273 /*---------------------------------------------------*/
    274 /*---                                             ---*/
    275 /*---------------------------------------------------*/
    276 
    277 /* This logic isn't really right when it comes to Cygwin. */
    278 #ifdef _WIN32
    279 #  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
    280 #else
    281 #  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
    282 #endif
    283 
    284 #define BLOCK_HEADER_HI  0x00003141UL
    285 #define BLOCK_HEADER_LO  0x59265359UL
    286 
    287 #define BLOCK_ENDMARK_HI 0x00001772UL
    288 #define BLOCK_ENDMARK_LO 0x45385090UL
    289 
    290 /* Increase if necessary.  However, a .bz2 file with > 50000 blocks
    291    would have an uncompressed size of at least 40GB, so the chances
    292    are low you'll need to up this.
    293 */
    294 #define BZ_MAX_HANDLED_BLOCKS 50000
    295 
    296 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
    297 MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
    298 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
    299 MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
    300 
    301 Int32 main ( Int32 argc, Char** argv )
    302 {
    303    FILE*       inFile;
    304    FILE*       outFile;
    305    BitStream*  bsIn, *bsWr;
    306    Int32       b, wrBlock, currBlock, rbCtr;
    307    MaybeUInt64 bitsRead;
    308 
    309    UInt32      buffHi, buffLo, blockCRC;
    310    Char*       p;
    311 
    312    strcpy ( progName, argv[0] );
    313    inFileName[0] = outFileName[0] = 0;
    314 
    315    fprintf ( stderr,
    316              "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" );
    317 
    318    if (argc != 2) {
    319       fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
    320                         progName, progName );
    321       switch (sizeof(MaybeUInt64)) {
    322          case 8:
    323             fprintf(stderr,
    324                     "\trestrictions on size of recovered file: None\n");
    325             break;
    326          case 4:
    327             fprintf(stderr,
    328                     "\trestrictions on size of recovered file: 512 MB\n");
    329             fprintf(stderr,
    330                     "\tto circumvent, recompile with MaybeUInt64 as an\n"
    331                     "\tunsigned 64-bit int.\n");
    332             break;
    333          default:
    334             fprintf(stderr,
    335                     "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
    336                     "configuration error.\n");
    337             break;
    338       }
    339       exit(1);
    340    }
    341 
    342    if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
    343       fprintf ( stderr,
    344                 "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
    345                 progName, (int)strlen(argv[1]) );
    346       exit(1);
    347    }
    348 
    349    strcpy ( inFileName, argv[1] );
    350 
    351    inFile = fopen ( inFileName, "rb" );
    352    if (inFile == NULL) {
    353       fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
    354       exit(1);
    355    }
    356 
    357    bsIn = bsOpenReadStream ( inFile );
    358    fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
    359 
    360    bitsRead = 0;
    361    buffHi = buffLo = 0;
    362    currBlock = 0;
    363    bStart[currBlock] = 0;
    364 
    365    rbCtr = 0;
    366 
    367    while (True) {
    368       b = bsGetBit ( bsIn );
    369       bitsRead++;
    370       if (b == 2) {
    371          if (bitsRead >= bStart[currBlock] &&
    372             (bitsRead - bStart[currBlock]) >= 40) {
    373             bEnd[currBlock] = bitsRead-1;
    374             if (currBlock > 0)
    375                fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
    376                                  " to " MaybeUInt64_FMT " (incomplete)\n",
    377                          currBlock,  bStart[currBlock], bEnd[currBlock] );
    378          } else
    379             currBlock--;
    380          break;
    381       }
    382       buffHi = (buffHi << 1) | (buffLo >> 31);
    383       buffLo = (buffLo << 1) | (b & 1);
    384       if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
    385              && buffLo == BLOCK_HEADER_LO)
    386            ||
    387            ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
    388              && buffLo == BLOCK_ENDMARK_LO)
    389          ) {
    390          if (bitsRead > 49) {
    391             bEnd[currBlock] = bitsRead-49;
    392          } else {
    393             bEnd[currBlock] = 0;
    394          }
    395          if (currBlock > 0 &&
    396 	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
    397             fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
    398                               " to " MaybeUInt64_FMT "\n",
    399                       rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
    400             rbStart[rbCtr] = bStart[currBlock];
    401             rbEnd[rbCtr] = bEnd[currBlock];
    402             rbCtr++;
    403          }
    404          if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
    405             tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
    406          currBlock++;
    407 
    408          bStart[currBlock] = bitsRead;
    409       }
    410    }
    411 
    412    bsClose ( bsIn );
    413 
    414    /*-- identified blocks run from 1 to rbCtr inclusive. --*/
    415 
    416    if (rbCtr < 1) {
    417       fprintf ( stderr,
    418                 "%s: sorry, I couldn't find any block boundaries.\n",
    419                 progName );
    420       exit(1);
    421    };
    422 
    423    fprintf ( stderr, "%s: splitting into blocks\n", progName );
    424 
    425    inFile = fopen ( inFileName, "rb" );
    426    if (inFile == NULL) {
    427       fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
    428       exit(1);
    429    }
    430    bsIn = bsOpenReadStream ( inFile );
    431 
    432    /*-- placate gcc's dataflow analyser --*/
    433    blockCRC = 0; bsWr = 0;
    434 
    435    bitsRead = 0;
    436    outFile = NULL;
    437    wrBlock = 0;
    438    while (True) {
    439       b = bsGetBit(bsIn);
    440       if (b == 2) break;
    441       buffHi = (buffHi << 1) | (buffLo >> 31);
    442       buffLo = (buffLo << 1) | (b & 1);
    443       if (bitsRead == 47+rbStart[wrBlock])
    444          blockCRC = (buffHi << 16) | (buffLo >> 16);
    445 
    446       if (outFile != NULL && bitsRead >= rbStart[wrBlock]
    447                           && bitsRead <= rbEnd[wrBlock]) {
    448          bsPutBit ( bsWr, b );
    449       }
    450 
    451       bitsRead++;
    452 
    453       if (bitsRead == rbEnd[wrBlock]+1) {
    454          if (outFile != NULL) {
    455             bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
    456             bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
    457             bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
    458             bsPutUInt32 ( bsWr, blockCRC );
    459             bsClose ( bsWr );
    460          }
    461          if (wrBlock >= rbCtr) break;
    462          wrBlock++;
    463       } else
    464       if (bitsRead == rbStart[wrBlock]) {
    465          /* Create the output file name, correctly handling leading paths.
    466             (31.10.2001 by Sergey E. Kusikov) */
    467          Char* split;
    468          Int32 ofs, k;
    469          for (k = 0; k < BZ_MAX_FILENAME; k++)
    470             outFileName[k] = 0;
    471          strcpy (outFileName, inFileName);
    472          split = strrchr (outFileName, BZ_SPLIT_SYM);
    473          if (split == NULL) {
    474             split = outFileName;
    475          } else {
    476             ++split;
    477 	 }
    478 	 /* Now split points to the start of the basename. */
    479          ofs  = split - outFileName;
    480          sprintf (split, "rec%5d", wrBlock+1);
    481          for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
    482          strcat (outFileName, inFileName + ofs);
    483 
    484          if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
    485 
    486          fprintf ( stderr, "   writing block %d to `%s' ...\n",
    487                            wrBlock+1, outFileName );
    488 
    489          outFile = fopen ( outFileName, "wb" );
    490          if (outFile == NULL) {
    491             fprintf ( stderr, "%s: can't write `%s'\n",
    492                       progName, outFileName );
    493             exit(1);
    494          }
    495          bsWr = bsOpenWriteStream ( outFile );
    496          bsPutUChar ( bsWr, BZ_HDR_B );
    497          bsPutUChar ( bsWr, BZ_HDR_Z );
    498          bsPutUChar ( bsWr, BZ_HDR_h );
    499          bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
    500          bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
    501          bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
    502          bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
    503       }
    504    }
    505 
    506    fprintf ( stderr, "%s: finished\n", progName );
    507    return 0;
    508 }
    509 
    510 
    511 
    512 /*-----------------------------------------------------------*/
    513 /*--- end                                  bzip2recover.c ---*/
    514 /*-----------------------------------------------------------*/
    515