Home | History | Annotate | Download | only in hash
      1 /*
      2  * Copyright (C) 2011 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
      5  * in compliance with the License. You may obtain a copy of the License at
      6  *
      7  * http://www.apache.org/licenses/LICENSE-2.0
      8  *
      9  * Unless required by applicable law or agreed to in writing, software distributed under the License
     10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
     11  * or implied. See the License for the specific language governing permissions and limitations under
     12  * the License.
     13  */
     14 
     15 package com.google.common.hash;
     16 
     17 import static com.google.common.base.Preconditions.checkArgument;
     18 
     19 import com.google.common.base.Preconditions;
     20 
     21 import java.nio.ByteBuffer;
     22 import java.nio.ByteOrder;
     23 import java.nio.charset.Charset;
     24 
     25 /**
     26  * Skeleton implementation of {@link HashFunction}. Provides default implementations which
     27  * invokes the appropriate method on {@link #newHasher()}, then return the result of
     28  * {@link Hasher#hash}.
     29  *
     30  * <p>Invocations of {@link #newHasher(int)} also delegate to {@linkplain #newHasher()}, ignoring
     31  * the expected input size parameter.
     32  *
     33  * @author Kevin Bourrillion
     34  */
     35 abstract class AbstractStreamingHashFunction implements HashFunction {
     36   @Override public <T> HashCode hashObject(T instance, Funnel<? super T> funnel) {
     37     return newHasher().putObject(instance, funnel).hash();
     38   }
     39 
     40   @Override public HashCode hashUnencodedChars(CharSequence input) {
     41     return newHasher().putUnencodedChars(input).hash();
     42   }
     43 
     44   @Override public HashCode hashString(CharSequence input, Charset charset) {
     45     return newHasher().putString(input, charset).hash();
     46   }
     47 
     48   @Override public HashCode hashInt(int input) {
     49     return newHasher().putInt(input).hash();
     50   }
     51 
     52   @Override public HashCode hashLong(long input) {
     53     return newHasher().putLong(input).hash();
     54   }
     55 
     56   @Override public HashCode hashBytes(byte[] input) {
     57     return newHasher().putBytes(input).hash();
     58   }
     59 
     60   @Override public HashCode hashBytes(byte[] input, int off, int len) {
     61     return newHasher().putBytes(input, off, len).hash();
     62   }
     63 
     64   @Override public Hasher newHasher(int expectedInputSize) {
     65     Preconditions.checkArgument(expectedInputSize >= 0);
     66     return newHasher();
     67   }
     68 
     69   /**
     70    * A convenience base class for implementors of {@code Hasher}; handles accumulating data
     71    * until an entire "chunk" (of implementation-dependent length) is ready to be hashed.
     72    *
     73    * @author Kevin Bourrillion
     74    * @author Dimitris Andreou
     75    */
     76   // TODO(kevinb): this class still needs some design-and-document-for-inheritance love
     77   protected static abstract class AbstractStreamingHasher extends AbstractHasher {
     78     /** Buffer via which we pass data to the hash algorithm (the implementor) */
     79     private final ByteBuffer buffer;
     80 
     81     /** Number of bytes to be filled before process() invocation(s). */
     82     private final int bufferSize;
     83 
     84     /** Number of bytes processed per process() invocation. */
     85     private final int chunkSize;
     86 
     87     /**
     88      * Constructor for use by subclasses. This hasher instance will process chunks of the specified
     89      * size.
     90      *
     91      * @param chunkSize the number of bytes available per {@link #process(ByteBuffer)} invocation;
     92      *        must be at least 4
     93      */
     94     protected AbstractStreamingHasher(int chunkSize) {
     95       this(chunkSize, chunkSize);
     96     }
     97 
     98     /**
     99      * Constructor for use by subclasses. This hasher instance will process chunks of the specified
    100      * size, using an internal buffer of {@code bufferSize} size, which must be a multiple of
    101      * {@code chunkSize}.
    102      *
    103      * @param chunkSize the number of bytes available per {@link #process(ByteBuffer)} invocation;
    104      *        must be at least 4
    105      * @param bufferSize the size of the internal buffer. Must be a multiple of chunkSize
    106      */
    107     protected AbstractStreamingHasher(int chunkSize, int bufferSize) {
    108       // TODO(kevinb): check more preconditions (as bufferSize >= chunkSize) if this is ever public
    109       checkArgument(bufferSize % chunkSize == 0);
    110 
    111       // TODO(user): benchmark performance difference with longer buffer
    112       this.buffer = ByteBuffer
    113           .allocate(bufferSize + 7) // always space for a single primitive
    114           .order(ByteOrder.LITTLE_ENDIAN);
    115       this.bufferSize = bufferSize;
    116       this.chunkSize = chunkSize;
    117     }
    118 
    119     /**
    120      * Processes the available bytes of the buffer (at most {@code chunk} bytes).
    121      */
    122     protected abstract void process(ByteBuffer bb);
    123 
    124     /**
    125      * This is invoked for the last bytes of the input, which are not enough to
    126      * fill a whole chunk. The passed {@code ByteBuffer} is guaranteed to be
    127      * non-empty.
    128      *
    129      * <p>This implementation simply pads with zeros and delegates to
    130      * {@link #process(ByteBuffer)}.
    131      */
    132     protected void processRemaining(ByteBuffer bb) {
    133       bb.position(bb.limit()); // move at the end
    134       bb.limit(chunkSize + 7); // get ready to pad with longs
    135       while (bb.position() < chunkSize) {
    136         bb.putLong(0);
    137       }
    138       bb.limit(chunkSize);
    139       bb.flip();
    140       process(bb);
    141     }
    142 
    143     @Override
    144     public final Hasher putBytes(byte[] bytes) {
    145       return putBytes(bytes, 0, bytes.length);
    146     }
    147 
    148     @Override
    149     public final Hasher putBytes(byte[] bytes, int off, int len) {
    150       return putBytes(ByteBuffer.wrap(bytes, off, len).order(ByteOrder.LITTLE_ENDIAN));
    151     }
    152 
    153     private Hasher putBytes(ByteBuffer readBuffer) {
    154       // If we have room for all of it, this is easy
    155       if (readBuffer.remaining() <= buffer.remaining()) {
    156         buffer.put(readBuffer);
    157         munchIfFull();
    158         return this;
    159       }
    160 
    161       // First add just enough to fill buffer size, and munch that
    162       int bytesToCopy = bufferSize - buffer.position();
    163       for (int i = 0; i < bytesToCopy; i++) {
    164         buffer.put(readBuffer.get());
    165       }
    166       munch(); // buffer becomes empty here, since chunkSize divides bufferSize
    167 
    168       // Now process directly from the rest of the input buffer
    169       while (readBuffer.remaining() >= chunkSize) {
    170         process(readBuffer);
    171       }
    172 
    173       // Finally stick the remainder back in our usual buffer
    174       buffer.put(readBuffer);
    175       return this;
    176     }
    177 
    178     @Override
    179     public final Hasher putUnencodedChars(CharSequence charSequence) {
    180       for (int i = 0; i < charSequence.length(); i++) {
    181         putChar(charSequence.charAt(i));
    182       }
    183       return this;
    184     }
    185 
    186     @Override
    187     public final Hasher putByte(byte b) {
    188       buffer.put(b);
    189       munchIfFull();
    190       return this;
    191     }
    192 
    193     @Override
    194     public final Hasher putShort(short s) {
    195       buffer.putShort(s);
    196       munchIfFull();
    197       return this;
    198     }
    199 
    200     @Override
    201     public final Hasher putChar(char c) {
    202       buffer.putChar(c);
    203       munchIfFull();
    204       return this;
    205     }
    206 
    207     @Override
    208     public final Hasher putInt(int i) {
    209       buffer.putInt(i);
    210       munchIfFull();
    211       return this;
    212     }
    213 
    214     @Override
    215     public final Hasher putLong(long l) {
    216       buffer.putLong(l);
    217       munchIfFull();
    218       return this;
    219     }
    220 
    221     @Override
    222     public final <T> Hasher putObject(T instance, Funnel<? super T> funnel) {
    223       funnel.funnel(instance, this);
    224       return this;
    225     }
    226 
    227     @Override
    228     public final HashCode hash() {
    229       munch();
    230       buffer.flip();
    231       if (buffer.remaining() > 0) {
    232         processRemaining(buffer);
    233       }
    234       return makeHash();
    235     }
    236 
    237     abstract HashCode makeHash();
    238 
    239     // Process pent-up data in chunks
    240     private void munchIfFull() {
    241       if (buffer.remaining() < 8) {
    242         // buffer is full; not enough room for a primitive. We have at least one full chunk.
    243         munch();
    244       }
    245     }
    246 
    247     private void munch() {
    248       buffer.flip();
    249       while (buffer.remaining() >= chunkSize) {
    250         // we could limit the buffer to ensure process() does not read more than
    251         // chunkSize number of bytes, but we trust the implementations
    252         process(buffer);
    253       }
    254       buffer.compact(); // preserve any remaining data that do not make a full chunk
    255     }
    256   }
    257 }
    258