Home | History | Annotate | Download | only in hash
      1 /*
      2  * Copyright (C) 2011 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
      5  * in compliance with the License. You may obtain a copy of the License at
      6  *
      7  * http://www.apache.org/licenses/LICENSE-2.0
      8  *
      9  * Unless required by applicable law or agreed to in writing, software distributed under the License
     10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
     11  * or implied. See the License for the specific language governing permissions and limitations under
     12  * the License.
     13  */
     14 
     15 package com.google.common.hash;
     16 
     17 import static com.google.common.base.Preconditions.checkArgument;
     18 import static com.google.common.base.Preconditions.checkNotNull;
     19 import static com.google.common.base.Preconditions.checkState;
     20 
     21 import com.google.common.annotations.Beta;
     22 import com.google.common.base.Preconditions;
     23 import com.google.common.primitives.Ints;
     24 import com.google.common.primitives.UnsignedInts;
     25 
     26 import java.io.Serializable;
     27 import java.security.MessageDigest;
     28 
     29 import javax.annotation.Nullable;
     30 
     31 /**
     32  * An immutable hash code of arbitrary bit length.
     33  *
     34  * @author Dimitris Andreou
     35  * @author Kurt Alfred Kluever
     36  * @since 11.0
     37  */
     38 @Beta
     39 public abstract class HashCode {
     40   HashCode() {}
     41 
     42   /**
     43    * Returns the number of bits in this hash code; a positive multiple of 8.
     44    */
     45   public abstract int bits();
     46 
     47   /**
     48    * Returns the first four bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to
     49    * an {@code int} value in little-endian order.
     50    *
     51    * @throws IllegalStateException if {@code bits() < 32}
     52    */
     53   public abstract int asInt();
     54 
     55   /**
     56    * Returns the first eight bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to
     57    * a {@code long} value in little-endian order.
     58    *
     59    * @throws IllegalStateException if {@code bits() < 64}
     60    */
     61   public abstract long asLong();
     62 
     63   /**
     64    * If this hashcode has enough bits, returns {@code asLong()}, otherwise returns a {@code long}
     65    * value with {@code asBytes()} as the least-significant bytes and {@code 0x00} as the remaining
     66    * most-significant bytes.
     67    *
     68    * @since 14.0 (since 11.0 as {@code Hashing.padToLong(HashCode)})
     69    */
     70   public abstract long padToLong();
     71 
     72   /**
     73    * Returns the value of this hash code as a byte array. The caller may modify the byte array;
     74    * changes to it will <i>not</i> be reflected in this {@code HashCode} object or any other arrays
     75    * returned by this method.
     76    */
     77   // TODO(user): consider ByteString here, when that is available
     78   public abstract byte[] asBytes();
     79 
     80   /**
     81    * Copies bytes from this hash code into {@code dest}.
     82    *
     83    * @param dest the byte array into which the hash code will be written
     84    * @param offset the start offset in the data
     85    * @param maxLength the maximum number of bytes to write
     86    * @return the number of bytes written to {@code dest}
     87    * @throws IndexOutOfBoundsException if there is not enough room in {@code dest}
     88    */
     89   public int writeBytesTo(byte[] dest, int offset, int maxLength) {
     90     maxLength = Ints.min(maxLength, bits() / 8);
     91     Preconditions.checkPositionIndexes(offset, offset + maxLength, dest.length);
     92     writeBytesToImpl(dest, offset, maxLength);
     93     return maxLength;
     94   }
     95 
     96   abstract void writeBytesToImpl(byte[] dest, int offset, int maxLength);
     97 
     98   /**
     99    * Returns a mutable view of the underlying bytes for the given {@code HashCode} if it is a
    100    * byte-based hashcode. Otherwise it returns {@link HashCode#asBytes}. Do <i>not</i> mutate this
    101    * array or else you will break the immutability contract of {@code HashCode}.
    102    */
    103   byte[] getBytesInternal() {
    104     return asBytes();
    105   }
    106 
    107   /**
    108    * Returns whether this {@code HashCode} and that {@code HashCode} have the same value, given that
    109    * they have the same number of bits.
    110    */
    111   abstract boolean equalsSameBits(HashCode that);
    112 
    113   /**
    114    * Creates a 32-bit {@code HashCode} representation of the given int value. The underlying bytes
    115    * are interpreted in little endian order.
    116    *
    117    * @since 15.0 (since 12.0 in HashCodes)
    118    */
    119   public static HashCode fromInt(int hash) {
    120     return new IntHashCode(hash);
    121   }
    122 
    123   private static final class IntHashCode extends HashCode implements Serializable {
    124     final int hash;
    125 
    126     IntHashCode(int hash) {
    127       this.hash = hash;
    128     }
    129 
    130     @Override
    131     public int bits() {
    132       return 32;
    133     }
    134 
    135     @Override
    136     public byte[] asBytes() {
    137       return new byte[] {
    138           (byte) hash,
    139           (byte) (hash >> 8),
    140           (byte) (hash >> 16),
    141           (byte) (hash >> 24)};
    142     }
    143 
    144     @Override
    145     public int asInt() {
    146       return hash;
    147     }
    148 
    149     @Override
    150     public long asLong() {
    151       throw new IllegalStateException("this HashCode only has 32 bits; cannot create a long");
    152     }
    153 
    154     @Override
    155     public long padToLong() {
    156       return UnsignedInts.toLong(hash);
    157     }
    158 
    159     @Override
    160     void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
    161       for (int i = 0; i < maxLength; i++) {
    162         dest[offset + i] = (byte) (hash >> (i * 8));
    163       }
    164     }
    165 
    166     @Override boolean equalsSameBits(HashCode that) {
    167       return hash == that.asInt();
    168     }
    169 
    170     private static final long serialVersionUID = 0;
    171   }
    172 
    173   /**
    174    * Creates a 64-bit {@code HashCode} representation of the given long value. The underlying bytes
    175    * are interpreted in little endian order.
    176    *
    177    * @since 15.0 (since 12.0 in HashCodes)
    178    */
    179   public static HashCode fromLong(long hash) {
    180     return new LongHashCode(hash);
    181   }
    182 
    183   private static final class LongHashCode extends HashCode implements Serializable {
    184     final long hash;
    185 
    186     LongHashCode(long hash) {
    187       this.hash = hash;
    188     }
    189 
    190     @Override
    191     public int bits() {
    192       return 64;
    193     }
    194 
    195     @Override
    196     public byte[] asBytes() {
    197       return new byte[] {
    198           (byte) hash,
    199           (byte) (hash >> 8),
    200           (byte) (hash >> 16),
    201           (byte) (hash >> 24),
    202           (byte) (hash >> 32),
    203           (byte) (hash >> 40),
    204           (byte) (hash >> 48),
    205           (byte) (hash >> 56)};
    206     }
    207 
    208     @Override
    209     public int asInt() {
    210       return (int) hash;
    211     }
    212 
    213     @Override
    214     public long asLong() {
    215       return hash;
    216     }
    217 
    218     @Override
    219     public long padToLong() {
    220       return hash;
    221     }
    222 
    223     @Override
    224     void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
    225       for (int i = 0; i < maxLength; i++) {
    226         dest[offset + i] = (byte) (hash >> (i * 8));
    227       }
    228     }
    229 
    230     @Override
    231     boolean equalsSameBits(HashCode that) {
    232       return hash == that.asLong();
    233     }
    234 
    235     private static final long serialVersionUID = 0;
    236   }
    237 
    238   /**
    239    * Creates a {@code HashCode} from a byte array. The array is defensively copied to preserve
    240    * the immutability contract of {@code HashCode}. The array cannot be empty.
    241    *
    242    * @since 15.0 (since 12.0 in HashCodes)
    243    */
    244   public static HashCode fromBytes(byte[] bytes) {
    245     checkArgument(bytes.length >= 1, "A HashCode must contain at least 1 byte.");
    246     return fromBytesNoCopy(bytes.clone());
    247   }
    248 
    249   /**
    250    * Creates a {@code HashCode} from a byte array. The array is <i>not</i> copied defensively,
    251    * so it must be handed-off so as to preserve the immutability contract of {@code HashCode}.
    252    */
    253   static HashCode fromBytesNoCopy(byte[] bytes) {
    254     return new BytesHashCode(bytes);
    255   }
    256 
    257   private static final class BytesHashCode extends HashCode implements Serializable {
    258     final byte[] bytes;
    259 
    260     BytesHashCode(byte[] bytes) {
    261       this.bytes = checkNotNull(bytes);
    262     }
    263 
    264     @Override
    265     public int bits() {
    266       return bytes.length * 8;
    267     }
    268 
    269     @Override
    270     public byte[] asBytes() {
    271       return bytes.clone();
    272     }
    273 
    274     @Override
    275     public int asInt() {
    276       checkState(bytes.length >= 4,
    277           "HashCode#asInt() requires >= 4 bytes (it only has %s bytes).", bytes.length);
    278       return (bytes[0] & 0xFF)
    279           | ((bytes[1] & 0xFF) << 8)
    280           | ((bytes[2] & 0xFF) << 16)
    281           | ((bytes[3] & 0xFF) << 24);
    282     }
    283 
    284     @Override
    285     public long asLong() {
    286       checkState(bytes.length >= 8,
    287           "HashCode#asLong() requires >= 8 bytes (it only has %s bytes).", bytes.length);
    288       return padToLong();
    289     }
    290 
    291     @Override
    292     public long padToLong() {
    293       long retVal = (bytes[0] & 0xFF);
    294       for (int i = 1; i < Math.min(bytes.length, 8); i++) {
    295         retVal |= (bytes[i] & 0xFFL) << (i * 8);
    296       }
    297       return retVal;
    298     }
    299 
    300     @Override
    301     void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
    302       System.arraycopy(bytes, 0, dest, offset, maxLength);
    303     }
    304 
    305     @Override
    306     byte[] getBytesInternal() {
    307       return bytes;
    308     }
    309 
    310     @Override
    311     boolean equalsSameBits(HashCode that) {
    312       return MessageDigest.isEqual(bytes, that.getBytesInternal());
    313     }
    314 
    315     private static final long serialVersionUID = 0;
    316   }
    317 
    318   /**
    319    * Creates a {@code HashCode} from a hexadecimal ({@code base 16}) encoded string. The string must
    320    * be at least 2 characters long, and contain only valid, lower-cased hexadecimal characters.
    321    *
    322    * <p>This method accepts the exact format generated by {@link #toString}. If you require more
    323    * lenient {@code base 16} decoding, please use
    324    * {@link com.google.common.io.BaseEncoding#decode} (and pass the result to {@link #fromBytes}).
    325    *
    326    * @since 15.0
    327    */
    328   public static HashCode fromString(String string) {
    329     checkArgument(string.length() >= 2,
    330         "input string (%s) must have at least 2 characters", string);
    331     checkArgument(string.length() % 2 == 0,
    332         "input string (%s) must have an even number of characters", string);
    333 
    334     byte[] bytes = new byte[string.length() / 2];
    335     for (int i = 0; i < string.length(); i += 2) {
    336       int ch1 = decode(string.charAt(i)) << 4;
    337       int ch2 = decode(string.charAt(i + 1));
    338       bytes[i / 2] = (byte) (ch1 + ch2);
    339     }
    340     return fromBytesNoCopy(bytes);
    341   }
    342 
    343   private static int decode(char ch) {
    344     if (ch >= '0' && ch <= '9') {
    345       return ch - '0';
    346     }
    347     if (ch >= 'a' && ch <= 'f') {
    348       return ch - 'a' + 10;
    349     }
    350     throw new IllegalArgumentException("Illegal hexadecimal character: " + ch);
    351   }
    352 
    353   @Override
    354   public final boolean equals(@Nullable Object object) {
    355     if (object instanceof HashCode) {
    356       HashCode that = (HashCode) object;
    357       return bits() == that.bits() && equalsSameBits(that);
    358     }
    359     return false;
    360   }
    361 
    362   /**
    363    * Returns a "Java hash code" for this {@code HashCode} instance; this is well-defined
    364    * (so, for example, you can safely put {@code HashCode} instances into a {@code
    365    * HashSet}) but is otherwise probably not what you want to use.
    366    */
    367   @Override
    368   public final int hashCode() {
    369     // If we have at least 4 bytes (32 bits), just take the first 4 bytes. Since this is
    370     // already a (presumably) high-quality hash code, any four bytes of it will do.
    371     if (bits() >= 32) {
    372       return asInt();
    373     }
    374     // If we have less than 4 bytes, use them all.
    375     byte[] bytes = asBytes();
    376     int val = (bytes[0] & 0xFF);
    377     for (int i = 1; i < bytes.length; i++) {
    378       val |= ((bytes[i] & 0xFF) << (i * 8));
    379     }
    380     return val;
    381   }
    382 
    383   /**
    384    * Returns a string containing each byte of {@link #asBytes}, in order, as a two-digit unsigned
    385    * hexadecimal number in lower case.
    386    *
    387    * <p>Note that if the output is considered to be a single hexadecimal number, this hash code's
    388    * bytes are the <i>big-endian</i> representation of that number. This may be surprising since
    389    * everything else in the hashing API uniformly treats multibyte values as little-endian. But
    390    * this format conveniently matches that of utilities such as the UNIX {@code md5sum} command.
    391    *
    392    * <p>To create a {@code HashCode} from its string representation, see {@link #fromString}.
    393    */
    394   @Override
    395   public final String toString() {
    396     byte[] bytes = asBytes();
    397     StringBuilder sb = new StringBuilder(2 * bytes.length);
    398     for (byte b : bytes) {
    399       sb.append(hexDigits[(b >> 4) & 0xf]).append(hexDigits[b & 0xf]);
    400     }
    401     return sb.toString();
    402   }
    403 
    404   private static final char[] hexDigits = "0123456789abcdef".toCharArray();
    405 }
    406