Home | History | Annotate | Download | only in hash
      1 /*
      2  * Copyright (C) 2011 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.common.hash;
     18 
     19 import static org.junit.Assert.assertEquals;
     20 
     21 import com.google.common.base.Charsets;
     22 import com.google.common.collect.ImmutableSet;
     23 import com.google.common.collect.Sets;
     24 import com.google.common.jdk5backport.Arrays;
     25 import com.google.common.primitives.Ints;
     26 import com.google.common.testing.EqualsTester;
     27 
     28 import org.junit.Assert;
     29 
     30 import java.nio.charset.Charset;
     31 import java.util.Random;
     32 import java.util.Set;
     33 
     34 /**
     35  * Various utilities for testing {@link HashFunction}s.
     36  *
     37  * @author Dimitris Andreou
     38  * @author Kurt Alfred Kluever
     39  */
     40 final class HashTestUtils {
     41   private HashTestUtils() {}
     42 
     43   /**
     44    * Converts a string, which should contain only ascii-representable characters, to a byte[].
     45    */
     46   static byte[] ascii(String string) {
     47     byte[] bytes = new byte[string.length()];
     48     for (int i = 0; i < string.length(); i++) {
     49       bytes[i] = (byte) string.charAt(i);
     50     }
     51     return bytes;
     52   }
     53 
     54   interface HashFn {
     55     byte[] hash(byte[] input, int seed);
     56   }
     57 
     58   static void verifyHashFunction(HashFn hashFunction, int hashbits, int expected) {
     59     int hashBytes = hashbits / 8;
     60 
     61     byte[] key = new byte[256];
     62     byte[] hashes = new byte[hashBytes * 256];
     63 
     64     // Hash keys of the form {}, {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as the seed
     65     for (int i = 0; i < 256; i++) {
     66       key[i] = (byte) i;
     67       int seed = 256 - i;
     68       byte[] hash = hashFunction.hash(Arrays.copyOf(key, i), seed);
     69       System.arraycopy(hash, 0, hashes, i * hashBytes, hash.length);
     70     }
     71 
     72     // Then hash the result array
     73     byte[] result = hashFunction.hash(hashes, 0);
     74 
     75     // interpreted in little-endian order.
     76     int verification = Integer.reverseBytes(Ints.fromByteArray(result));
     77 
     78     if (expected != verification) {
     79       throw new AssertionError("Expected: " + Integer.toHexString(expected)
     80           + " got: " + Integer.toHexString(verification));
     81     }
     82   }
     83 
     84   static final Funnel<Object> BAD_FUNNEL = new Funnel<Object>() {
     85     @Override public void funnel(Object object, PrimitiveSink bytePrimitiveSink) {
     86       bytePrimitiveSink.putInt(object.hashCode());
     87     }
     88   };
     89 
     90   static enum RandomHasherAction {
     91     PUT_BOOLEAN() {
     92       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
     93         boolean value = random.nextBoolean();
     94         for (PrimitiveSink sink : sinks) {
     95           sink.putBoolean(value);
     96         }
     97       }
     98     },
     99     PUT_BYTE() {
    100       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    101         int value = random.nextInt();
    102         for (PrimitiveSink sink : sinks) {
    103           sink.putByte((byte) value);
    104         }
    105       }
    106     },
    107     PUT_SHORT() {
    108       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    109         short value = (short) random.nextInt();
    110         for (PrimitiveSink sink : sinks) {
    111           sink.putShort(value);
    112         }
    113       }
    114     },
    115     PUT_CHAR() {
    116       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    117         char value = (char) random.nextInt();
    118         for (PrimitiveSink sink : sinks) {
    119           sink.putChar(value);
    120         }
    121       }
    122     },
    123     PUT_INT() {
    124       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    125         int value = random.nextInt();
    126         for (PrimitiveSink sink : sinks) {
    127           sink.putInt(value);
    128         }
    129       }
    130     },
    131     PUT_LONG() {
    132       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    133         long value = random.nextLong();
    134         for (PrimitiveSink sink : sinks) {
    135           sink.putLong(value);
    136         }
    137       }
    138     },
    139     PUT_FLOAT() {
    140       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    141         float value = random.nextFloat();
    142         for (PrimitiveSink sink : sinks) {
    143           sink.putFloat(value);
    144         }
    145       }
    146     },
    147     PUT_DOUBLE() {
    148       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    149         double value = random.nextDouble();
    150         for (PrimitiveSink sink : sinks) {
    151           sink.putDouble(value);
    152         }
    153       }
    154     },
    155     PUT_BYTES() {
    156       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    157         byte[] value = new byte[random.nextInt(128)];
    158         random.nextBytes(value);
    159         for (PrimitiveSink sink : sinks) {
    160           sink.putBytes(value);
    161         }
    162       }
    163     },
    164     PUT_BYTES_INT_INT() {
    165       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    166         byte[] value = new byte[random.nextInt(128)];
    167         random.nextBytes(value);
    168         int off = random.nextInt(value.length + 1);
    169         int len = random.nextInt(value.length - off + 1);
    170         for (PrimitiveSink sink : sinks) {
    171           sink.putBytes(value);
    172         }
    173       }
    174     },
    175     PUT_STRING() {
    176       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    177         char[] value = new char[random.nextInt(128)];
    178         for (int i = 0; i < value.length; i++) {
    179           value[i] = (char) random.nextInt();
    180         }
    181         String s = new String(value);
    182         for (PrimitiveSink sink : sinks) {
    183           sink.putUnencodedChars(s);
    184         }
    185       }
    186     },
    187     PUT_STRING_LOW_SURROGATE() {
    188       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    189         String s = new String(new char[] { randomLowSurrogate(random) });
    190         for (PrimitiveSink sink : sinks) {
    191           sink.putUnencodedChars(s);
    192         }
    193       }
    194     },
    195     PUT_STRING_HIGH_SURROGATE() {
    196       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    197         String s = new String(new char[] { randomHighSurrogate(random) });
    198         for (PrimitiveSink sink : sinks) {
    199           sink.putUnencodedChars(s);
    200         }
    201       }
    202     },
    203     PUT_STRING_LOW_HIGH_SURROGATE() {
    204       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    205         String s = new String(new char[] {
    206             randomLowSurrogate(random), randomHighSurrogate(random)});
    207         for (PrimitiveSink sink : sinks) {
    208           sink.putUnencodedChars(s);
    209         }
    210       }
    211     },
    212     PUT_STRING_HIGH_LOW_SURROGATE() {
    213       @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
    214         String s = new String(new char[] {
    215             randomHighSurrogate(random), randomLowSurrogate(random)});
    216         for (PrimitiveSink sink : sinks) {
    217           sink.putUnencodedChars(s);
    218         }
    219       }
    220     };
    221 
    222     abstract void performAction(Random random, Iterable<? extends PrimitiveSink> sinks);
    223 
    224     private static final RandomHasherAction[] actions = values();
    225 
    226     static RandomHasherAction pickAtRandom(Random random) {
    227       return actions[random.nextInt(actions.length)];
    228     }
    229   }
    230 
    231   /**
    232    * Test that the hash function contains no funnels. A funnel is a situation where a set of input
    233    * (key) bits 'affects' a strictly smaller set of output bits. Funneling is bad because it can
    234    * result in more-than-ideal collisions for a non-uniformly distributed key space. In practice,
    235    * most key spaces are ANYTHING BUT uniformly distributed. A bit(i) in the input is said to
    236    * 'affect' a bit(j) in the output if two inputs, identical but for bit(i), will differ at output
    237    * bit(j) about half the time
    238    *
    239    * <p>Funneling is pretty simple to detect. The key idea is to find example keys which
    240    * unequivocably demonstrate that funneling cannot be occuring. This is done bit-by-bit. For
    241    * each input bit(i) and output bit(j), two pairs of keys must be found with all bits identical
    242    * except bit(i). One pair must differ in output bit(j), and one pair must not. This proves that
    243    * input bit(i) can alter output bit(j).
    244    */
    245   static void checkNoFunnels(HashFunction function) {
    246     Random rand = new Random(0);
    247     int keyBits = 32;
    248     int hashBits = function.bits();
    249 
    250     // output loop tests input bit
    251     for (int i = 0; i < keyBits; i++) {
    252       int same = 0x0; // bitset for output bits with same values
    253       int diff = 0x0; // bitset for output bits with different values
    254       int count = 0;
    255       // originally was 2 * Math.log(...), making it try more times to avoid flakiness issues
    256       int maxCount = (int) (4 * Math.log(2 * keyBits * hashBits) + 1);
    257       while (same != 0xffffffff || diff != 0xffffffff) {
    258         int key1 = rand.nextInt();
    259         // flip input bit for key2
    260         int key2 = key1 ^ (1 << i);
    261         // get hashes
    262         int hash1 = function.hashInt(key1).asInt();
    263         int hash2 = function.hashInt(key2).asInt();
    264         // test whether the hash values have same output bits
    265         same |= ~(hash1 ^ hash2);
    266         // test whether the hash values have different output bits
    267         diff |= (hash1 ^ hash2);
    268 
    269         count++;
    270         // check whether we've exceeded the probabilistically
    271         // likely number of trials to have proven no funneling
    272         if (count > maxCount) {
    273           Assert.fail("input bit(" + i + ") was found not to affect all " +
    274                hashBits + " output bits; The unaffected bits are " +
    275                "as follows: " + ~(same & diff) + ". This was " +
    276                "determined after " + count + " trials.");
    277         }
    278       }
    279     }
    280   }
    281 
    282   /**
    283    * Test for avalanche. Avalanche means that output bits differ with roughly 1/2 probability on
    284    * different input keys. This test verifies that each possible 1-bit key delta achieves avalanche.
    285    *
    286    * <p>For more information: http://burtleburtle.net/bob/hash/avalanche.html
    287    */
    288   static void checkAvalanche(HashFunction function, int trials, double epsilon) {
    289     Random rand = new Random(0);
    290     int keyBits = 32;
    291     int hashBits = function.bits();
    292     for (int i = 0; i < keyBits; i++) {
    293       int[] same = new int[hashBits];
    294       int[] diff = new int[hashBits];
    295       // go through trials to compute probability
    296       for (int j = 0; j < trials; j++) {
    297         int key1 = rand.nextInt();
    298         // flip input bit for key2
    299         int key2 = key1 ^ (1 << i);
    300         // compute hash values
    301         int hash1 = function.hashInt(key1).asInt();
    302         int hash2 = function.hashInt(key2).asInt();
    303         for (int k = 0; k < hashBits; k++) {
    304           if ((hash1 & (1 << k)) == (hash2 & (1 << k))) {
    305             same[k] += 1;
    306           } else {
    307             diff[k] += 1;
    308           }
    309         }
    310       }
    311       // measure probability and assert it's within margin of error
    312       for (int j = 0; j < hashBits; j++) {
    313         double prob = (double) diff[j] / (double) (diff[j] + same[j]);
    314         Assert.assertEquals(0.50d, prob, epsilon);
    315       }
    316     }
    317   }
    318 
    319   /**
    320    * Test for 2-bit characteristics. A characteristic is a delta in the input which is repeated in
    321    * the output. For example, if f() is a block cipher and c is a characteristic, then
    322    * f(x^c) = f(x)^c with greater than expected probability. The test for funneling is merely a test
    323    * for 1-bit characteristics.
    324    *
    325    * <p>There is more general code provided by Bob Jenkins to test arbitrarily sized characteristics
    326    * using the magic of gaussian elimination: http://burtleburtle.net/bob/crypto/findingc.html.
    327    */
    328   static void checkNo2BitCharacteristics(HashFunction function) {
    329     Random rand = new Random(0);
    330     int keyBits = 32;
    331 
    332     // get every one of (keyBits choose 2) deltas:
    333     for (int i = 0; i < keyBits; i++) {
    334       for (int j = 0; j < keyBits; j++) {
    335         if (j <= i) continue;
    336         int count = 0;
    337         int maxCount = 20; // the probability of error here is miniscule
    338         boolean diff = false;
    339 
    340         while (!diff) {
    341           int delta = (1 << i) | (1 << j);
    342           int key1 = rand.nextInt();
    343           // apply delta
    344           int key2 = key1 ^ delta;
    345 
    346           // get hashes
    347           int hash1 = function.hashInt(key1).asInt();
    348           int hash2 = function.hashInt(key2).asInt();
    349 
    350           // this 2-bit candidate delta is not a characteristic
    351           // if deltas are different
    352           if ((hash1 ^ hash2) != delta) {
    353             diff = true;
    354             continue;
    355           }
    356 
    357           // check if we've exceeded the probabilistically
    358           // likely number of trials to have proven 2-bit candidate
    359           // is not a characteristic
    360           count++;
    361           if (count > maxCount) {
    362             Assert.fail("2-bit delta (" + i + ", " + j + ") is likely a " +
    363                  "characteristic for this hash. This was " +
    364                  "determined after " + count + " trials");
    365           }
    366         }
    367       }
    368     }
    369   }
    370 
    371   /**
    372    * Test for avalanche with 2-bit deltas. Most probabilities of output bit(j) differing are well
    373    * within 50%.
    374    */
    375   static void check2BitAvalanche(HashFunction function, int trials, double epsilon) {
    376     Random rand = new Random(0);
    377     int keyBits = 32;
    378     int hashBits = function.bits();
    379     for (int bit1 = 0; bit1 < keyBits; bit1++) {
    380       for (int bit2 = 0; bit2 < keyBits; bit2++) {
    381         if (bit2 <= bit1) continue;
    382         int delta = (1 << bit1) | (1 << bit2);
    383         int[] same = new int[hashBits];
    384         int[] diff = new int[hashBits];
    385         // go through trials to compute probability
    386         for (int j = 0; j < trials; j++) {
    387           int key1 = rand.nextInt();
    388           // flip input bit for key2
    389           int key2 = key1 ^ delta;
    390           // compute hash values
    391           int hash1 = function.hashInt(key1).asInt();
    392           int hash2 = function.hashInt(key2).asInt();
    393           for (int k = 0; k < hashBits; k++) {
    394             if ((hash1 & (1 << k)) == (hash2 & (1 << k))) {
    395               same[k] += 1;
    396             } else {
    397               diff[k] += 1;
    398             }
    399           }
    400         }
    401         // measure probability and assert it's within margin of error
    402         for (int j = 0; j < hashBits; j++) {
    403           double prob = (double) diff[j] / (double) (diff[j] + same[j]);
    404           Assert.assertEquals(0.50d, prob, epsilon);
    405         }
    406       }
    407     }
    408   }
    409 
    410   /**
    411    * Checks that a Hasher returns the same HashCode when given the same input, and also
    412    * that the collision rate looks sane.
    413    */
    414   static void assertInvariants(HashFunction hashFunction) {
    415     int objects = 100;
    416     Set<HashCode> hashcodes = Sets.newHashSetWithExpectedSize(objects);
    417     for (int i = 0; i < objects; i++) {
    418       Object o = new Object();
    419       HashCode hashcode1 = hashFunction.hashObject(o, HashTestUtils.BAD_FUNNEL);
    420       HashCode hashcode2 = hashFunction.hashObject(o, HashTestUtils.BAD_FUNNEL);
    421       Assert.assertEquals(hashcode1, hashcode2); // idempotent
    422       Assert.assertEquals(hashFunction.bits(), hashcode1.bits());
    423       Assert.assertEquals(hashFunction.bits(), hashcode1.asBytes().length * 8);
    424       hashcodes.add(hashcode1);
    425     }
    426     Assert.assertTrue(hashcodes.size() > objects * 0.95); // quite relaxed test
    427 
    428     assertHashBytesThrowsCorrectExceptions(hashFunction);
    429     assertIndependentHashers(hashFunction);
    430     assertShortcutsAreEquivalent(hashFunction, 512);
    431   }
    432 
    433   static void assertHashBytesThrowsCorrectExceptions(HashFunction hashFunction) {
    434     hashFunction.hashBytes(new byte[64], 0, 0);
    435 
    436     try {
    437       hashFunction.hashBytes(new byte[128], -1, 128);
    438       Assert.fail();
    439     } catch (IndexOutOfBoundsException expected) {}
    440     try {
    441       hashFunction.hashBytes(new byte[128], 64, 256 /* too long len */);
    442       Assert.fail();
    443     } catch (IndexOutOfBoundsException expected) {}
    444     try {
    445       hashFunction.hashBytes(new byte[64], 0, -1);
    446       Assert.fail();
    447     } catch (IndexOutOfBoundsException expected) {}
    448   }
    449 
    450   static void assertIndependentHashers(HashFunction hashFunction) {
    451     int numActions = 100;
    452     // hashcodes from non-overlapping hash computations
    453     HashCode expected1 = randomHash(hashFunction, new Random(1L), numActions);
    454     HashCode expected2 = randomHash(hashFunction, new Random(2L), numActions);
    455 
    456     // equivalent, but overlapping, computations (should produce the same results as above)
    457     Random random1 = new Random(1L);
    458     Random random2 = new Random(2L);
    459     Hasher hasher1 = hashFunction.newHasher();
    460     Hasher hasher2 = hashFunction.newHasher();
    461     for (int i = 0; i < numActions; i++) {
    462       RandomHasherAction.pickAtRandom(random1).performAction(random1, ImmutableSet.of(hasher1));
    463       RandomHasherAction.pickAtRandom(random2).performAction(random2, ImmutableSet.of(hasher2));
    464     }
    465 
    466     Assert.assertEquals(expected1, hasher1.hash());
    467     Assert.assertEquals(expected2, hasher2.hash());
    468   }
    469 
    470   static HashCode randomHash(HashFunction hashFunction, Random random, int numActions) {
    471     Hasher hasher = hashFunction.newHasher();
    472     for (int i = 0; i < numActions; i++) {
    473       RandomHasherAction.pickAtRandom(random).performAction(random, ImmutableSet.of(hasher));
    474     }
    475     return hasher.hash();
    476   }
    477 
    478   private static void assertShortcutsAreEquivalent(HashFunction hashFunction, int trials) {
    479     Random random = new Random(42085L);
    480     for (int i = 0; i < trials; i++) {
    481       assertHashBytesEquivalence(hashFunction, random);
    482       assertHashIntEquivalence(hashFunction, random);
    483       assertHashLongEquivalence(hashFunction, random);
    484       assertHashStringEquivalence(hashFunction, random);
    485       assertHashStringWithSurrogatesEquivalence(hashFunction, random);
    486     }
    487   }
    488 
    489   private static void assertHashBytesEquivalence(HashFunction hashFunction, Random random) {
    490     int size = random.nextInt(2048);
    491     byte[] bytes = new byte[size];
    492     random.nextBytes(bytes);
    493     assertEquals(hashFunction.hashBytes(bytes),
    494         hashFunction.newHasher(size).putBytes(bytes).hash());
    495     int off = random.nextInt(size);
    496     int len = random.nextInt(size - off);
    497     assertEquals(hashFunction.hashBytes(bytes, off, len),
    498         hashFunction.newHasher(size).putBytes(bytes, off, len).hash());
    499   }
    500 
    501   private static void assertHashIntEquivalence(HashFunction hashFunction, Random random) {
    502     int i = random.nextInt();
    503     assertEquals(hashFunction.hashInt(i),
    504         hashFunction.newHasher().putInt(i).hash());
    505   }
    506 
    507   private static void assertHashLongEquivalence(HashFunction hashFunction, Random random) {
    508     long l = random.nextLong();
    509     assertEquals(hashFunction.hashLong(l),
    510         hashFunction.newHasher().putLong(l).hash());
    511   }
    512 
    513   private static final ImmutableSet<Charset> CHARSETS = ImmutableSet.of(
    514       Charsets.ISO_8859_1,
    515       Charsets.US_ASCII,
    516       Charsets.UTF_16,
    517       Charsets.UTF_16BE,
    518       Charsets.UTF_16LE,
    519       Charsets.UTF_8);
    520 
    521   private static void assertHashStringEquivalence(HashFunction hashFunction, Random random) {
    522     // Test that only data and data-order is important, not the individual operations.
    523     new EqualsTester()
    524         .addEqualityGroup(
    525             hashFunction.hashUnencodedChars("abc"),
    526             hashFunction.newHasher().putUnencodedChars("abc").hash(),
    527             hashFunction.newHasher().putUnencodedChars("ab").putUnencodedChars("c").hash(),
    528             hashFunction.newHasher().putUnencodedChars("a").putUnencodedChars("bc").hash(),
    529             hashFunction.newHasher().putUnencodedChars("a").putUnencodedChars("b")
    530                 .putUnencodedChars("c").hash(),
    531             hashFunction.newHasher().putChar('a').putUnencodedChars("bc").hash(),
    532             hashFunction.newHasher().putUnencodedChars("ab").putChar('c').hash(),
    533             hashFunction.newHasher().putChar('a').putChar('b').putChar('c').hash())
    534         .testEquals();
    535 
    536     int size = random.nextInt(2048);
    537     byte[] bytes = new byte[size];
    538     random.nextBytes(bytes);
    539     String string = new String(bytes);
    540     assertEquals(hashFunction.hashUnencodedChars(string),
    541         hashFunction.newHasher().putUnencodedChars(string).hash());
    542     for (Charset charset : CHARSETS) {
    543       assertEquals(hashFunction.hashString(string, charset),
    544           hashFunction.newHasher().putString(string, charset).hash());
    545     }
    546   }
    547 
    548   /**
    549    * This verifies that putUnencodedChars(String) and hashUnencodedChars(String) are equivalent,
    550    * even for funny strings composed by (possibly unmatched, and mostly illegal) surrogate
    551    * characters. (But doesn't test that they do the right thing - just their consistency).
    552    */
    553   private static void assertHashStringWithSurrogatesEquivalence(
    554       HashFunction hashFunction, Random random) {
    555     int size = random.nextInt(8) + 1;
    556     char[] chars = new char[size];
    557     for (int i = 0; i < chars.length; i++) {
    558       chars[i] = random.nextBoolean() ? randomLowSurrogate(random) : randomHighSurrogate(random);
    559     }
    560     String string = new String(chars);
    561     assertEquals(hashFunction.hashUnencodedChars(string),
    562         hashFunction.newHasher().putUnencodedChars(string).hash());
    563   }
    564 
    565   static char randomLowSurrogate(Random random) {
    566     return (char) (Character.MIN_LOW_SURROGATE
    567         + random.nextInt(Character.MAX_LOW_SURROGATE - Character.MIN_LOW_SURROGATE + 1));
    568   }
    569 
    570   static char randomHighSurrogate(Random random) {
    571     return (char) (Character.MIN_HIGH_SURROGATE
    572         + random.nextInt(Character.MAX_HIGH_SURROGATE - Character.MIN_HIGH_SURROGATE + 1));
    573   }
    574 }
    575