Home | History | Annotate | Download | only in bsdiff
      1 // Copyright 2016 Google Inc. All rights reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 package com.google.archivepatcher.applier.bsdiff;
     16 
     17 import com.google.archivepatcher.applier.PatchFormatException;
     18 
     19 import java.io.BufferedInputStream;
     20 import java.io.BufferedOutputStream;
     21 import java.io.IOException;
     22 import java.io.InputStream;
     23 import java.io.OutputStream;
     24 import java.io.RandomAccessFile;
     25 
     26 /**
     27  * A Java implementation of the "bspatch" algorithm based on the BSD-2 licensed source code
     28  * available here: https://github.com/mendsley/bsdiff. This implementation supports a maximum file
     29  * size of 2GB for all binaries involved (old, new and patch binaries).
     30  */
     31 public class BsPatch {
     32   /**
     33    * Standard header found at the start of every patch.
     34    */
     35   private static final String SIGNATURE = "ENDSLEY/BSDIFF43";
     36 
     37   /**
     38    * Default buffer size is 50 kibibytes, a reasonable tradeoff between size and speed.
     39    */
     40   private static final int PATCH_BUFFER_SIZE = 1024 * 50;
     41 
     42   /**
     43    * Masks the upper bit of a long, used to determine if a long is positive or negative.
     44    */
     45   private static final long NEGATIVE_LONG_SIGN_MASK = 1L << 63;
     46 
     47   /**
     48    * The patch is typically compressed and the input stream is decompressing on-the-fly. A small
     49    * buffer greatly improves efficiency on complicated patches with lots of short directives.
     50    */
     51   private static final int PATCH_STREAM_BUFFER_SIZE = 4 * 1024;
     52 
     53   /**
     54    * Complicated patches with lots of short directives result in many calls to write small amounts
     55    * of data. A buffer greatly improves efficiency for these patches.
     56    */
     57   private static final int OUTPUT_STREAM_BUFFER_SIZE = 16 * 1024;
     58 
     59   /**
     60    * Applies a patch from |patchData| to the data in |oldData|, writing the result to |newData|.
     61    *
     62    * @param oldData data to which the patch should be applied
     63    * @param newData stream to write the new artifact to
     64    * @param patchData stream to read patch instructions from
     65    * @throws PatchFormatException if the patch stream is invalid
     66    * @throws IOException if unable to read or write any of the data
     67    */
     68   public static void applyPatch(
     69       RandomAccessFile oldData, OutputStream newData, InputStream patchData)
     70       throws PatchFormatException, IOException {
     71     patchData = new BufferedInputStream(patchData, PATCH_STREAM_BUFFER_SIZE);
     72     newData = new BufferedOutputStream(newData, OUTPUT_STREAM_BUFFER_SIZE);
     73     try {
     74       applyPatchInternal(oldData, newData, patchData);
     75     } finally {
     76       newData.flush();
     77     }
     78   }
     79 
     80   /**
     81    * Does the work of the public applyPatch method.
     82    */
     83   private static void applyPatchInternal(
     84       final RandomAccessFile oldData,
     85       final OutputStream newData,
     86       final InputStream patchData)
     87       throws PatchFormatException, IOException {
     88     final byte[] signatureBuffer = new byte[SIGNATURE.length()];
     89     try {
     90       readFully(patchData, signatureBuffer, 0, signatureBuffer.length);
     91     } catch (IOException e) {
     92       throw new PatchFormatException("truncated signature");
     93     }
     94 
     95     String signature = new String(signatureBuffer, 0, signatureBuffer.length, "US-ASCII");
     96     if (!SIGNATURE.equals(signature)) {
     97       throw new PatchFormatException("bad signature");
     98     }
     99 
    100     // Sanity-check: ensure a-priori knowledge matches patch expectations
    101     final long oldSize = oldData.length();
    102     if (oldSize > Integer.MAX_VALUE) {
    103       throw new PatchFormatException("bad oldSize");
    104     }
    105     final long newSize = readBsdiffLong(patchData);
    106     if (newSize < 0 || newSize > Integer.MAX_VALUE) {
    107       throw new PatchFormatException("bad newSize");
    108     }
    109 
    110     // These buffers are used for performing transformations and copies. They are not stateful.
    111     final byte[] buffer1 = new byte[PATCH_BUFFER_SIZE];
    112     final byte[] buffer2 = new byte[PATCH_BUFFER_SIZE];
    113 
    114     // Offsets into |oldData| and |newData|.
    115     long oldDataOffset = 0; // strobes |oldData| in order specified by the patch file
    116     long newDataBytesWritten = 0; // monotonically increases from 0 .. |expectedNewSize|
    117 
    118     while (newDataBytesWritten < newSize) {
    119       // Read "control data" for the operation. There are three values here:
    120       // 1. |diffSegmentLength| defines a number of "similar" bytes that can be transformed
    121       //    from |oldData| to |newData| by applying byte-by-byte addends. The addend bytes are
    122       //    read from |patchData|. If zero, no "similar" bytes are transformed in this
    123       //    operation.
    124       final long diffSegmentLength = readBsdiffLong(patchData);
    125 
    126       // 2. |copySegmentLength| defines a number of identical bytes that can be copied from
    127       //    |oldData| to |newData|. If zero, no identical bytes are copied in this operation.
    128       final long copySegmentLength = readBsdiffLong(patchData);
    129 
    130       // 3. |offsetToNextInput| defines a relative offset to the next position in |oldData| to
    131       //    jump do after the current operation completes. Strangely, this compensates for
    132       //    |diffSegmentLength| but not for |copySegmentLength|, so |diffSegmentLength| must
    133       //    be accumulated into |oldDataOffset| while |copySegmentLength| must NOT be.
    134       final long offsetToNextInput = readBsdiffLong(patchData);
    135 
    136       // Sanity-checks
    137       if (diffSegmentLength < 0 || diffSegmentLength > Integer.MAX_VALUE) {
    138         throw new PatchFormatException("bad diffSegmentLength");
    139       }
    140       if (copySegmentLength < 0 || copySegmentLength > Integer.MAX_VALUE) {
    141         throw new PatchFormatException("bad copySegmentLength");
    142       }
    143       if (offsetToNextInput < Integer.MIN_VALUE || offsetToNextInput > Integer.MAX_VALUE) {
    144         throw new PatchFormatException("bad offsetToNextInput");
    145       }
    146 
    147       final long expectedFinalNewDataBytesWritten =
    148           newDataBytesWritten + diffSegmentLength + copySegmentLength;
    149       if (expectedFinalNewDataBytesWritten > newSize) {
    150         throw new PatchFormatException("expectedFinalNewDataBytesWritten too large");
    151       }
    152 
    153       final long expectedFinalOldDataOffset = oldDataOffset + diffSegmentLength + offsetToNextInput;
    154       if (expectedFinalOldDataOffset > oldSize) {
    155         throw new PatchFormatException("expectedFinalOldDataOffset too large");
    156       }
    157       if (expectedFinalOldDataOffset < 0) {
    158         throw new PatchFormatException("expectedFinalOldDataOffset is negative");
    159       }
    160 
    161       // At this point everything is known to be sane, and the operations should all succeed.
    162       oldData.seek(oldDataOffset);
    163       if (diffSegmentLength > 0) {
    164         transformBytes((int) diffSegmentLength, patchData, oldData, newData, buffer1, buffer2);
    165       }
    166       if (copySegmentLength > 0) {
    167         pipe(patchData, newData, buffer1, (int) copySegmentLength);
    168       }
    169       newDataBytesWritten = expectedFinalNewDataBytesWritten;
    170       oldDataOffset = expectedFinalOldDataOffset;
    171     }
    172   }
    173 
    174   /**
    175    * Transforms bytes from |oldData| into |newData| by applying byte-for-byte addends from
    176    * |patchData|. The number of bytes consumed from |oldData| and |patchData|, as well as the
    177    * number of bytes written to |newData|, is |diffLength|. The contents of the buffers are
    178    * ignored and overwritten, and no guarantee is made as to their contents when this method
    179    * returns. This is the core of the bsdiff patching algorithm. |buffer1.length| must equal
    180    * |buffer2.length|, and |buffer1| and |buffer2| must be distinct objects.
    181    *
    182    * @param diffLength the length of the BsDiff entry (how many bytes to read and apply).
    183    * @param patchData the input stream from the BsDiff patch containing diff bytes. This stream
    184    *                  must be positioned so that the first byte read is the first addend to be
    185    *                  applied to the first byte of data to be read from |oldData|.
    186    * @param oldData the old file, for the diff bytes to be applied to. This input source must be
    187    *                positioned so that the first byte read is the first byte of data to which the
    188    *                first byte of addends from |patchData| should be applied.
    189    * @param newData the stream to write the resulting data to.
    190    * @param buffer1 temporary buffer to use for data transformation; contents are ignored, may be
    191    *                overwritten, and are undefined when this method returns.
    192    * @param buffer2 temporary buffer to use for data transformation; contents are ignored, may be
    193    *                overwritten, and are undefined when this method returns.
    194    */
    195   // Visible for testing only
    196   static void transformBytes(
    197       final int diffLength,
    198       final InputStream patchData,
    199       final RandomAccessFile oldData,
    200       final OutputStream newData,
    201       final byte[] buffer1,
    202       final byte[] buffer2)
    203       throws IOException {
    204     int numBytesLeft = diffLength;
    205     while (numBytesLeft > 0) {
    206       final int numBytesThisRound = Math.min(numBytesLeft, buffer1.length);
    207       oldData.readFully(buffer1, 0, numBytesThisRound);
    208       readFully(patchData, buffer2, 0, numBytesThisRound);
    209       for (int i = 0; i < numBytesThisRound; i++) {
    210         buffer1[i] += buffer2[i];
    211       }
    212       newData.write(buffer1, 0, numBytesThisRound);
    213       numBytesLeft -= numBytesThisRound;
    214     }
    215   }
    216 
    217   /**
    218    * Reads a long value in little-endian, signed-magnitude format (the format used by the C++
    219    * bsdiff implementation).
    220    *
    221    * @param in the stream to read from
    222    * @return the long value
    223    * @throws PatchFormatException if the value is negative zero (unsupported)
    224    * @throws IOException if unable to read all 8 bytes from the stream
    225    */
    226   // Visible for testing only
    227   static final long readBsdiffLong(InputStream in) throws PatchFormatException, IOException {
    228     long result = 0;
    229     for (int bitshift = 0; bitshift < 64; bitshift += 8) {
    230       result |= ((long) in.read()) << bitshift;
    231     }
    232 
    233     if (result == NEGATIVE_LONG_SIGN_MASK) {
    234       // "Negative zero", which is valid in signed-magnitude format.
    235       // NB: No sane patch generator should ever produce such a value.
    236       throw new PatchFormatException("read negative zero");
    237     }
    238 
    239     if ((result & NEGATIVE_LONG_SIGN_MASK) != 0) {
    240       result = -(result & ~NEGATIVE_LONG_SIGN_MASK);
    241     }
    242 
    243     return result;
    244   }
    245 
    246   /**
    247    * Read exactly the specified number of bytes into the specified buffer.
    248    *
    249    * @param in the input stream to read from
    250    * @param destination where to write the bytes to
    251    * @param startAt the offset at which to start writing bytes in the destination buffer
    252    * @param numBytes the number of bytes to read
    253    * @throws IOException if reading from the stream fails
    254    */
    255   // Visible for testing only
    256   static void readFully(
    257       final InputStream in, final byte[] destination, final int startAt, final int numBytes)
    258       throws IOException {
    259     int numRead = 0;
    260     while (numRead < numBytes) {
    261       int readNow = in.read(destination, startAt + numRead, numBytes - numRead);
    262       if (readNow == -1) {
    263         throw new IOException("truncated input stream");
    264       }
    265       numRead += readNow;
    266     }
    267   }
    268 
    269   /**
    270    * Use an intermediate buffer to pipe bytes from an InputStream directly to an OutputStream. The
    271    * buffer's contents may be destroyed by this operation.
    272    *
    273    * @param in the stream to read bytes from.
    274    * @param out the stream to write bytes to.
    275    * @param buffer the buffer to use for copying bytes; must have length > 0
    276    * @param copyLength the number of bytes to copy from the input stream to the output stream
    277    */
    278   // Visible for testing only
    279   static void pipe(
    280       final InputStream in, final OutputStream out, final byte[] buffer, int copyLength)
    281       throws IOException {
    282     while (copyLength > 0) {
    283       int maxCopy = Math.min(buffer.length, copyLength);
    284       readFully(in, buffer, 0, maxCopy);
    285       out.write(buffer, 0, maxCopy);
    286       copyLength -= maxCopy;
    287     }
    288   }
    289 }
    290