1 // Copyright 2016 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package com.google.archivepatcher.applier.bsdiff; 16 17 import com.google.archivepatcher.applier.PatchFormatException; 18 19 import java.io.BufferedInputStream; 20 import java.io.BufferedOutputStream; 21 import java.io.IOException; 22 import java.io.InputStream; 23 import java.io.OutputStream; 24 import java.io.RandomAccessFile; 25 26 /** 27 * A Java implementation of the "bspatch" algorithm based on the BSD-2 licensed source code 28 * available here: https://github.com/mendsley/bsdiff. This implementation supports a maximum file 29 * size of 2GB for all binaries involved (old, new and patch binaries). 30 */ 31 public class BsPatch { 32 /** 33 * Standard header found at the start of every patch. 34 */ 35 private static final String SIGNATURE = "ENDSLEY/BSDIFF43"; 36 37 /** 38 * Default buffer size is 50 kibibytes, a reasonable tradeoff between size and speed. 39 */ 40 private static final int PATCH_BUFFER_SIZE = 1024 * 50; 41 42 /** 43 * Masks the upper bit of a long, used to determine if a long is positive or negative. 44 */ 45 private static final long NEGATIVE_LONG_SIGN_MASK = 1L << 63; 46 47 /** 48 * The patch is typically compressed and the input stream is decompressing on-the-fly. A small 49 * buffer greatly improves efficiency on complicated patches with lots of short directives. 50 */ 51 private static final int PATCH_STREAM_BUFFER_SIZE = 4 * 1024; 52 53 /** 54 * Complicated patches with lots of short directives result in many calls to write small amounts 55 * of data. A buffer greatly improves efficiency for these patches. 56 */ 57 private static final int OUTPUT_STREAM_BUFFER_SIZE = 16 * 1024; 58 59 /** 60 * Applies a patch from |patchData| to the data in |oldData|, writing the result to |newData|. 61 * 62 * @param oldData data to which the patch should be applied 63 * @param newData stream to write the new artifact to 64 * @param patchData stream to read patch instructions from 65 * @throws PatchFormatException if the patch stream is invalid 66 * @throws IOException if unable to read or write any of the data 67 */ 68 public static void applyPatch( 69 RandomAccessFile oldData, OutputStream newData, InputStream patchData) 70 throws PatchFormatException, IOException { 71 patchData = new BufferedInputStream(patchData, PATCH_STREAM_BUFFER_SIZE); 72 newData = new BufferedOutputStream(newData, OUTPUT_STREAM_BUFFER_SIZE); 73 try { 74 applyPatchInternal(oldData, newData, patchData); 75 } finally { 76 newData.flush(); 77 } 78 } 79 80 /** 81 * Does the work of the public applyPatch method. 82 */ 83 private static void applyPatchInternal( 84 final RandomAccessFile oldData, 85 final OutputStream newData, 86 final InputStream patchData) 87 throws PatchFormatException, IOException { 88 final byte[] signatureBuffer = new byte[SIGNATURE.length()]; 89 try { 90 readFully(patchData, signatureBuffer, 0, signatureBuffer.length); 91 } catch (IOException e) { 92 throw new PatchFormatException("truncated signature"); 93 } 94 95 String signature = new String(signatureBuffer, 0, signatureBuffer.length, "US-ASCII"); 96 if (!SIGNATURE.equals(signature)) { 97 throw new PatchFormatException("bad signature"); 98 } 99 100 // Sanity-check: ensure a-priori knowledge matches patch expectations 101 final long oldSize = oldData.length(); 102 if (oldSize > Integer.MAX_VALUE) { 103 throw new PatchFormatException("bad oldSize"); 104 } 105 final long newSize = readBsdiffLong(patchData); 106 if (newSize < 0 || newSize > Integer.MAX_VALUE) { 107 throw new PatchFormatException("bad newSize"); 108 } 109 110 // These buffers are used for performing transformations and copies. They are not stateful. 111 final byte[] buffer1 = new byte[PATCH_BUFFER_SIZE]; 112 final byte[] buffer2 = new byte[PATCH_BUFFER_SIZE]; 113 114 // Offsets into |oldData| and |newData|. 115 long oldDataOffset = 0; // strobes |oldData| in order specified by the patch file 116 long newDataBytesWritten = 0; // monotonically increases from 0 .. |expectedNewSize| 117 118 while (newDataBytesWritten < newSize) { 119 // Read "control data" for the operation. There are three values here: 120 // 1. |diffSegmentLength| defines a number of "similar" bytes that can be transformed 121 // from |oldData| to |newData| by applying byte-by-byte addends. The addend bytes are 122 // read from |patchData|. If zero, no "similar" bytes are transformed in this 123 // operation. 124 final long diffSegmentLength = readBsdiffLong(patchData); 125 126 // 2. |copySegmentLength| defines a number of identical bytes that can be copied from 127 // |oldData| to |newData|. If zero, no identical bytes are copied in this operation. 128 final long copySegmentLength = readBsdiffLong(patchData); 129 130 // 3. |offsetToNextInput| defines a relative offset to the next position in |oldData| to 131 // jump do after the current operation completes. Strangely, this compensates for 132 // |diffSegmentLength| but not for |copySegmentLength|, so |diffSegmentLength| must 133 // be accumulated into |oldDataOffset| while |copySegmentLength| must NOT be. 134 final long offsetToNextInput = readBsdiffLong(patchData); 135 136 // Sanity-checks 137 if (diffSegmentLength < 0 || diffSegmentLength > Integer.MAX_VALUE) { 138 throw new PatchFormatException("bad diffSegmentLength"); 139 } 140 if (copySegmentLength < 0 || copySegmentLength > Integer.MAX_VALUE) { 141 throw new PatchFormatException("bad copySegmentLength"); 142 } 143 if (offsetToNextInput < Integer.MIN_VALUE || offsetToNextInput > Integer.MAX_VALUE) { 144 throw new PatchFormatException("bad offsetToNextInput"); 145 } 146 147 final long expectedFinalNewDataBytesWritten = 148 newDataBytesWritten + diffSegmentLength + copySegmentLength; 149 if (expectedFinalNewDataBytesWritten > newSize) { 150 throw new PatchFormatException("expectedFinalNewDataBytesWritten too large"); 151 } 152 153 final long expectedFinalOldDataOffset = oldDataOffset + diffSegmentLength + offsetToNextInput; 154 if (expectedFinalOldDataOffset > oldSize) { 155 throw new PatchFormatException("expectedFinalOldDataOffset too large"); 156 } 157 if (expectedFinalOldDataOffset < 0) { 158 throw new PatchFormatException("expectedFinalOldDataOffset is negative"); 159 } 160 161 // At this point everything is known to be sane, and the operations should all succeed. 162 oldData.seek(oldDataOffset); 163 if (diffSegmentLength > 0) { 164 transformBytes((int) diffSegmentLength, patchData, oldData, newData, buffer1, buffer2); 165 } 166 if (copySegmentLength > 0) { 167 pipe(patchData, newData, buffer1, (int) copySegmentLength); 168 } 169 newDataBytesWritten = expectedFinalNewDataBytesWritten; 170 oldDataOffset = expectedFinalOldDataOffset; 171 } 172 } 173 174 /** 175 * Transforms bytes from |oldData| into |newData| by applying byte-for-byte addends from 176 * |patchData|. The number of bytes consumed from |oldData| and |patchData|, as well as the 177 * number of bytes written to |newData|, is |diffLength|. The contents of the buffers are 178 * ignored and overwritten, and no guarantee is made as to their contents when this method 179 * returns. This is the core of the bsdiff patching algorithm. |buffer1.length| must equal 180 * |buffer2.length|, and |buffer1| and |buffer2| must be distinct objects. 181 * 182 * @param diffLength the length of the BsDiff entry (how many bytes to read and apply). 183 * @param patchData the input stream from the BsDiff patch containing diff bytes. This stream 184 * must be positioned so that the first byte read is the first addend to be 185 * applied to the first byte of data to be read from |oldData|. 186 * @param oldData the old file, for the diff bytes to be applied to. This input source must be 187 * positioned so that the first byte read is the first byte of data to which the 188 * first byte of addends from |patchData| should be applied. 189 * @param newData the stream to write the resulting data to. 190 * @param buffer1 temporary buffer to use for data transformation; contents are ignored, may be 191 * overwritten, and are undefined when this method returns. 192 * @param buffer2 temporary buffer to use for data transformation; contents are ignored, may be 193 * overwritten, and are undefined when this method returns. 194 */ 195 // Visible for testing only 196 static void transformBytes( 197 final int diffLength, 198 final InputStream patchData, 199 final RandomAccessFile oldData, 200 final OutputStream newData, 201 final byte[] buffer1, 202 final byte[] buffer2) 203 throws IOException { 204 int numBytesLeft = diffLength; 205 while (numBytesLeft > 0) { 206 final int numBytesThisRound = Math.min(numBytesLeft, buffer1.length); 207 oldData.readFully(buffer1, 0, numBytesThisRound); 208 readFully(patchData, buffer2, 0, numBytesThisRound); 209 for (int i = 0; i < numBytesThisRound; i++) { 210 buffer1[i] += buffer2[i]; 211 } 212 newData.write(buffer1, 0, numBytesThisRound); 213 numBytesLeft -= numBytesThisRound; 214 } 215 } 216 217 /** 218 * Reads a long value in little-endian, signed-magnitude format (the format used by the C++ 219 * bsdiff implementation). 220 * 221 * @param in the stream to read from 222 * @return the long value 223 * @throws PatchFormatException if the value is negative zero (unsupported) 224 * @throws IOException if unable to read all 8 bytes from the stream 225 */ 226 // Visible for testing only 227 static final long readBsdiffLong(InputStream in) throws PatchFormatException, IOException { 228 long result = 0; 229 for (int bitshift = 0; bitshift < 64; bitshift += 8) { 230 result |= ((long) in.read()) << bitshift; 231 } 232 233 if (result == NEGATIVE_LONG_SIGN_MASK) { 234 // "Negative zero", which is valid in signed-magnitude format. 235 // NB: No sane patch generator should ever produce such a value. 236 throw new PatchFormatException("read negative zero"); 237 } 238 239 if ((result & NEGATIVE_LONG_SIGN_MASK) != 0) { 240 result = -(result & ~NEGATIVE_LONG_SIGN_MASK); 241 } 242 243 return result; 244 } 245 246 /** 247 * Read exactly the specified number of bytes into the specified buffer. 248 * 249 * @param in the input stream to read from 250 * @param destination where to write the bytes to 251 * @param startAt the offset at which to start writing bytes in the destination buffer 252 * @param numBytes the number of bytes to read 253 * @throws IOException if reading from the stream fails 254 */ 255 // Visible for testing only 256 static void readFully( 257 final InputStream in, final byte[] destination, final int startAt, final int numBytes) 258 throws IOException { 259 int numRead = 0; 260 while (numRead < numBytes) { 261 int readNow = in.read(destination, startAt + numRead, numBytes - numRead); 262 if (readNow == -1) { 263 throw new IOException("truncated input stream"); 264 } 265 numRead += readNow; 266 } 267 } 268 269 /** 270 * Use an intermediate buffer to pipe bytes from an InputStream directly to an OutputStream. The 271 * buffer's contents may be destroyed by this operation. 272 * 273 * @param in the stream to read bytes from. 274 * @param out the stream to write bytes to. 275 * @param buffer the buffer to use for copying bytes; must have length > 0 276 * @param copyLength the number of bytes to copy from the input stream to the output stream 277 */ 278 // Visible for testing only 279 static void pipe( 280 final InputStream in, final OutputStream out, final byte[] buffer, int copyLength) 281 throws IOException { 282 while (copyLength > 0) { 283 int maxCopy = Math.min(buffer.length, copyLength); 284 readFully(in, buffer, 0, maxCopy); 285 out.write(buffer, 0, maxCopy); 286 copyLength -= maxCopy; 287 } 288 } 289 } 290