Home | History | Annotate | Download | only in hash
      1 /*
      2  * Copyright (C) 2011 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
      5  * in compliance with the License. You may obtain a copy of the License at
      6  *
      7  * http://www.apache.org/licenses/LICENSE-2.0
      8  *
      9  * Unless required by applicable law or agreed to in writing, software distributed under the License
     10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
     11  * or implied. See the License for the specific language governing permissions and limitations under
     12  * the License.
     13  */
     14 
     15 package com.google.common.hash;
     16 
     17 import static com.google.common.base.Preconditions.checkArgument;
     18 import static com.google.common.base.Preconditions.checkNotNull;
     19 import static com.google.common.base.Preconditions.checkState;
     20 
     21 import com.google.common.annotations.Beta;
     22 import com.google.common.base.Preconditions;
     23 import com.google.common.primitives.Ints;
     24 import com.google.common.primitives.UnsignedInts;
     25 
     26 import java.io.Serializable;
     27 import java.security.MessageDigest;
     28 
     29 import javax.annotation.Nullable;
     30 
     31 /**
     32  * An immutable hash code of arbitrary bit length.
     33  *
     34  * @author Dimitris Andreou
     35  * @author Kurt Alfred Kluever
     36  * @since 11.0
     37  */
     38 @Beta
     39 public abstract class HashCode {
     40   HashCode() {}
     41 
     42   /**
     43    * Returns the number of bits in this hash code; a positive multiple of 8.
     44    */
     45   public abstract int bits();
     46 
     47   /**
     48    * Returns the first four bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to
     49    * an {@code int} value in little-endian order.
     50    *
     51    * @throws IllegalStateException if {@code bits() < 32}
     52    */
     53   public abstract int asInt();
     54 
     55   /**
     56    * Returns the first eight bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to
     57    * a {@code long} value in little-endian order.
     58    *
     59    * @throws IllegalStateException if {@code bits() < 64}
     60    */
     61   public abstract long asLong();
     62 
     63   /**
     64    * If this hashcode has enough bits, returns {@code asLong()}, otherwise returns a {@code long}
     65    * value with {@code asBytes()} as the least-significant bytes and {@code 0x00} as the remaining
     66    * most-significant bytes.
     67    *
     68    * @since 14.0 (since 11.0 as {@code Hashing.padToLong(HashCode)})
     69    */
     70   public abstract long padToLong();
     71 
     72   /**
     73    * Returns the value of this hash code as a byte array. The caller may modify the byte array;
     74    * changes to it will <i>not</i> be reflected in this {@code HashCode} object or any other arrays
     75    * returned by this method.
     76    */
     77   // TODO(user): consider ByteString here, when that is available
     78   public abstract byte[] asBytes();
     79 
     80   /**
     81    * Copies bytes from this hash code into {@code dest}.
     82    *
     83    * @param dest the byte array into which the hash code will be written
     84    * @param offset the start offset in the data
     85    * @param maxLength the maximum number of bytes to write
     86    * @return the number of bytes written to {@code dest}
     87    * @throws IndexOutOfBoundsException if there is not enough room in {@code dest}
     88    */
     89   public int writeBytesTo(byte[] dest, int offset, int maxLength) {
     90     maxLength = Ints.min(maxLength, bits() / 8);
     91     Preconditions.checkPositionIndexes(offset, offset + maxLength, dest.length);
     92     writeBytesToImpl(dest, offset, maxLength);
     93     return maxLength;
     94   }
     95 
     96   abstract void writeBytesToImpl(byte[] dest, int offset, int maxLength);
     97 
     98   /**
     99    * Returns a mutable view of the underlying bytes for the given {@code HashCode} if it is a
    100    * byte-based hashcode. Otherwise it returns {@link HashCode#asBytes}. Do <i>not</i> mutate this
    101    * array or else you will break the immutability contract of {@code HashCode}.
    102    */
    103   byte[] getBytesInternal() {
    104     return asBytes();
    105   }
    106 
    107   /**
    108    * Creates a 32-bit {@code HashCode} representation of the given int value. The underlying bytes
    109    * are interpreted in little endian order.
    110    *
    111    * @since 15.0 (since 12.0 in HashCodes)
    112    */
    113   public static HashCode fromInt(int hash) {
    114     return new IntHashCode(hash);
    115   }
    116 
    117   private static final class IntHashCode extends HashCode implements Serializable {
    118     final int hash;
    119 
    120     IntHashCode(int hash) {
    121       this.hash = hash;
    122     }
    123 
    124     @Override
    125     public int bits() {
    126       return 32;
    127     }
    128 
    129     @Override
    130     public byte[] asBytes() {
    131       return new byte[] {
    132           (byte) hash,
    133           (byte) (hash >> 8),
    134           (byte) (hash >> 16),
    135           (byte) (hash >> 24)};
    136     }
    137 
    138     @Override
    139     public int asInt() {
    140       return hash;
    141     }
    142 
    143     @Override
    144     public long asLong() {
    145       throw new IllegalStateException("this HashCode only has 32 bits; cannot create a long");
    146     }
    147 
    148     @Override
    149     public long padToLong() {
    150       return UnsignedInts.toLong(hash);
    151     }
    152 
    153     @Override
    154     void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
    155       for (int i = 0; i < maxLength; i++) {
    156         dest[offset + i] = (byte) (hash >> (i * 8));
    157       }
    158     }
    159 
    160     private static final long serialVersionUID = 0;
    161   }
    162 
    163   /**
    164    * Creates a 64-bit {@code HashCode} representation of the given long value. The underlying bytes
    165    * are interpreted in little endian order.
    166    *
    167    * @since 15.0 (since 12.0 in HashCodes)
    168    */
    169   public static HashCode fromLong(long hash) {
    170     return new LongHashCode(hash);
    171   }
    172 
    173   private static final class LongHashCode extends HashCode implements Serializable {
    174     final long hash;
    175 
    176     LongHashCode(long hash) {
    177       this.hash = hash;
    178     }
    179 
    180     @Override
    181     public int bits() {
    182       return 64;
    183     }
    184 
    185     @Override
    186     public byte[] asBytes() {
    187       return new byte[] {
    188           (byte) hash,
    189           (byte) (hash >> 8),
    190           (byte) (hash >> 16),
    191           (byte) (hash >> 24),
    192           (byte) (hash >> 32),
    193           (byte) (hash >> 40),
    194           (byte) (hash >> 48),
    195           (byte) (hash >> 56)};
    196     }
    197 
    198     @Override
    199     public int asInt() {
    200       return (int) hash;
    201     }
    202 
    203     @Override
    204     public long asLong() {
    205       return hash;
    206     }
    207 
    208     @Override
    209     public long padToLong() {
    210       return hash;
    211     }
    212 
    213     @Override
    214     void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
    215       for (int i = 0; i < maxLength; i++) {
    216         dest[offset + i] = (byte) (hash >> (i * 8));
    217       }
    218     }
    219 
    220     private static final long serialVersionUID = 0;
    221   }
    222 
    223   /**
    224    * Creates a {@code HashCode} from a byte array. The array is defensively copied to preserve
    225    * the immutability contract of {@code HashCode}. The array cannot be empty.
    226    *
    227    * @since 15.0 (since 12.0 in HashCodes)
    228    */
    229   public static HashCode fromBytes(byte[] bytes) {
    230     checkArgument(bytes.length >= 1, "A HashCode must contain at least 1 byte.");
    231     return fromBytesNoCopy(bytes.clone());
    232   }
    233 
    234   /**
    235    * Creates a {@code HashCode} from a byte array. The array is <i>not</i> copied defensively,
    236    * so it must be handed-off so as to preserve the immutability contract of {@code HashCode}.
    237    */
    238   static HashCode fromBytesNoCopy(byte[] bytes) {
    239     return new BytesHashCode(bytes);
    240   }
    241 
    242   private static final class BytesHashCode extends HashCode implements Serializable {
    243     final byte[] bytes;
    244 
    245     BytesHashCode(byte[] bytes) {
    246       this.bytes = checkNotNull(bytes);
    247     }
    248 
    249     @Override
    250     public int bits() {
    251       return bytes.length * 8;
    252     }
    253 
    254     @Override
    255     public byte[] asBytes() {
    256       return bytes.clone();
    257     }
    258 
    259     @Override
    260     public int asInt() {
    261       checkState(bytes.length >= 4,
    262           "HashCode#asInt() requires >= 4 bytes (it only has %s bytes).", bytes.length);
    263       return (bytes[0] & 0xFF)
    264           | ((bytes[1] & 0xFF) << 8)
    265           | ((bytes[2] & 0xFF) << 16)
    266           | ((bytes[3] & 0xFF) << 24);
    267     }
    268 
    269     @Override
    270     public long asLong() {
    271       checkState(bytes.length >= 8,
    272           "HashCode#asLong() requires >= 8 bytes (it only has %s bytes).", bytes.length);
    273       return padToLong();
    274     }
    275 
    276     @Override
    277     public long padToLong() {
    278       long retVal = (bytes[0] & 0xFF);
    279       for (int i = 1; i < Math.min(bytes.length, 8); i++) {
    280         retVal |= (bytes[i] & 0xFFL) << (i * 8);
    281       }
    282       return retVal;
    283     }
    284 
    285     @Override
    286     void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
    287       System.arraycopy(bytes, 0, dest, offset, maxLength);
    288     }
    289 
    290     @Override
    291     byte[] getBytesInternal() {
    292       return bytes;
    293     }
    294 
    295     private static final long serialVersionUID = 0;
    296   }
    297 
    298   /**
    299    * Creates a {@code HashCode} from a hexadecimal ({@code base 16}) encoded string. The string must
    300    * be at least 2 characters long, and contain only valid, lower-cased hexadecimal characters.
    301    *
    302    * <p>This method accepts the exact format generated by {@link #toString}. If you require more
    303    * lenient {@code base 16} decoding, please use
    304    * {@link com.google.common.io.BaseEncoding#decode} (and pass the result to {@link #fromBytes}).
    305    *
    306    * @since 15.0
    307    */
    308   public static HashCode fromString(String string) {
    309     checkArgument(string.length() >= 2,
    310         "input string (%s) must have at least 2 characters", string);
    311     checkArgument(string.length() % 2 == 0,
    312         "input string (%s) must have an even number of characters", string);
    313 
    314     byte[] bytes = new byte[string.length() / 2];
    315     for (int i = 0; i < string.length(); i += 2) {
    316       int ch1 = decode(string.charAt(i)) << 4;
    317       int ch2 = decode(string.charAt(i + 1));
    318       bytes[i / 2] = (byte) (ch1 + ch2);
    319     }
    320     return fromBytesNoCopy(bytes);
    321   }
    322 
    323   private static int decode(char ch) {
    324     if (ch >= '0' && ch <= '9') {
    325       return ch - '0';
    326     }
    327     if (ch >= 'a' && ch <= 'f') {
    328       return ch - 'a' + 10;
    329     }
    330     throw new IllegalArgumentException("Illegal hexadecimal character: " + ch);
    331   }
    332 
    333   @Override
    334   public final boolean equals(@Nullable Object object) {
    335     if (object instanceof HashCode) {
    336       HashCode that = (HashCode) object;
    337       // Undocumented: this is a non-short-circuiting equals(), in case this is a cryptographic
    338       // hash code, in which case we don't want to leak timing information
    339       return MessageDigest.isEqual(this.asBytes(), that.asBytes());
    340     }
    341     return false;
    342   }
    343 
    344   /**
    345    * Returns a "Java hash code" for this {@code HashCode} instance; this is well-defined
    346    * (so, for example, you can safely put {@code HashCode} instances into a {@code
    347    * HashSet}) but is otherwise probably not what you want to use.
    348    */
    349   @Override
    350   public final int hashCode() {
    351     // If we have at least 4 bytes (32 bits), just take the first 4 bytes. Since this is
    352     // already a (presumably) high-quality hash code, any four bytes of it will do.
    353     if (bits() >= 32) {
    354       return asInt();
    355     }
    356     // If we have less than 4 bytes, use them all.
    357     byte[] bytes = asBytes();
    358     int val = (bytes[0] & 0xFF);
    359     for (int i = 1; i < bytes.length; i++) {
    360       val |= ((bytes[i] & 0xFF) << (i * 8));
    361     }
    362     return val;
    363   }
    364 
    365   /**
    366    * Returns a string containing each byte of {@link #asBytes}, in order, as a two-digit unsigned
    367    * hexadecimal number in lower case.
    368    *
    369    * <p>Note that if the output is considered to be a single hexadecimal number, this hash code's
    370    * bytes are the <i>big-endian</i> representation of that number. This may be surprising since
    371    * everything else in the hashing API uniformly treats multibyte values as little-endian. But
    372    * this format conveniently matches that of utilities such as the UNIX {@code md5sum} command.
    373    *
    374    * <p>To create a {@code HashCode} from its string representation, see {@link #fromString}.
    375    */
    376   @Override
    377   public final String toString() {
    378     byte[] bytes = asBytes();
    379     StringBuilder sb = new StringBuilder(2 * bytes.length);
    380     for (byte b : bytes) {
    381       sb.append(hexDigits[(b >> 4) & 0xf]).append(hexDigits[b & 0xf]);
    382     }
    383     return sb.toString();
    384   }
    385 
    386   private static final char[] hexDigits = "0123456789abcdef".toCharArray();
    387 }
    388