Home | History | Annotate | Download | only in cache
      1 // Copyright 2017 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package cache
      6 
      7 import (
      8 	"bytes"
      9 	"crypto/sha256"
     10 	"fmt"
     11 	"hash"
     12 	"io"
     13 	"os"
     14 	"runtime"
     15 	"sync"
     16 )
     17 
     18 var debugHash = false // set when GODEBUG=gocachehash=1
     19 
     20 // HashSize is the number of bytes in a hash.
     21 const HashSize = 32
     22 
     23 // A Hash provides access to the canonical hash function used to index the cache.
     24 // The current implementation uses salted SHA256, but clients must not assume this.
     25 type Hash struct {
     26 	h    hash.Hash
     27 	name string        // for debugging
     28 	buf  *bytes.Buffer // for verify
     29 }
     30 
     31 // hashSalt is a salt string added to the beginning of every hash
     32 // created by NewHash. Using the Go version makes sure that different
     33 // versions of the go command (or even different Git commits during
     34 // work on the development branch) do not address the same cache
     35 // entries, so that a bug in one version does not affect the execution
     36 // of other versions. This salt will result in additional ActionID files
     37 // in the cache, but not additional copies of the large output files,
     38 // which are still addressed by unsalted SHA256.
     39 var hashSalt = []byte(runtime.Version())
     40 
     41 // Subkey returns an action ID corresponding to mixing a parent
     42 // action ID with a string description of the subkey.
     43 func Subkey(parent ActionID, desc string) ActionID {
     44 	h := sha256.New()
     45 	h.Write([]byte("subkey:"))
     46 	h.Write(parent[:])
     47 	h.Write([]byte(desc))
     48 	var out ActionID
     49 	h.Sum(out[:0])
     50 	if debugHash {
     51 		fmt.Fprintf(os.Stderr, "HASH subkey %x %q = %x\n", parent, desc, out)
     52 	}
     53 	if verify {
     54 		hashDebug.Lock()
     55 		hashDebug.m[out] = fmt.Sprintf("subkey %x %q", parent, desc)
     56 		hashDebug.Unlock()
     57 	}
     58 	return out
     59 }
     60 
     61 // NewHash returns a new Hash.
     62 // The caller is expected to Write data to it and then call Sum.
     63 func NewHash(name string) *Hash {
     64 	h := &Hash{h: sha256.New(), name: name}
     65 	if debugHash {
     66 		fmt.Fprintf(os.Stderr, "HASH[%s]\n", h.name)
     67 	}
     68 	h.Write(hashSalt)
     69 	if verify {
     70 		h.buf = new(bytes.Buffer)
     71 	}
     72 	return h
     73 }
     74 
     75 // Write writes data to the running hash.
     76 func (h *Hash) Write(b []byte) (int, error) {
     77 	if debugHash {
     78 		fmt.Fprintf(os.Stderr, "HASH[%s]: %q\n", h.name, b)
     79 	}
     80 	if h.buf != nil {
     81 		h.buf.Write(b)
     82 	}
     83 	return h.h.Write(b)
     84 }
     85 
     86 // Sum returns the hash of the data written previously.
     87 func (h *Hash) Sum() [HashSize]byte {
     88 	var out [HashSize]byte
     89 	h.h.Sum(out[:0])
     90 	if debugHash {
     91 		fmt.Fprintf(os.Stderr, "HASH[%s]: %x\n", h.name, out)
     92 	}
     93 	if h.buf != nil {
     94 		hashDebug.Lock()
     95 		if hashDebug.m == nil {
     96 			hashDebug.m = make(map[[HashSize]byte]string)
     97 		}
     98 		hashDebug.m[out] = h.buf.String()
     99 		hashDebug.Unlock()
    100 	}
    101 	return out
    102 }
    103 
    104 // In GODEBUG=gocacheverify=1 mode,
    105 // hashDebug holds the input to every computed hash ID,
    106 // so that we can work backward from the ID involved in a
    107 // cache entry mismatch to a description of what should be there.
    108 var hashDebug struct {
    109 	sync.Mutex
    110 	m map[[HashSize]byte]string
    111 }
    112 
    113 // reverseHash returns the input used to compute the hash id.
    114 func reverseHash(id [HashSize]byte) string {
    115 	hashDebug.Lock()
    116 	s := hashDebug.m[id]
    117 	hashDebug.Unlock()
    118 	return s
    119 }
    120 
    121 var hashFileCache struct {
    122 	sync.Mutex
    123 	m map[string][HashSize]byte
    124 }
    125 
    126 // HashFile returns the hash of the named file.
    127 // It caches repeated lookups for a given file,
    128 // and the cache entry for a file can be initialized
    129 // using SetFileHash.
    130 // The hash used by FileHash is not the same as
    131 // the hash used by NewHash.
    132 func FileHash(file string) ([HashSize]byte, error) {
    133 	hashFileCache.Lock()
    134 	out, ok := hashFileCache.m[file]
    135 	hashFileCache.Unlock()
    136 
    137 	if ok {
    138 		return out, nil
    139 	}
    140 
    141 	h := sha256.New()
    142 	f, err := os.Open(file)
    143 	if err != nil {
    144 		if debugHash {
    145 			fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err)
    146 		}
    147 		return [HashSize]byte{}, err
    148 	}
    149 	_, err = io.Copy(h, f)
    150 	f.Close()
    151 	if err != nil {
    152 		if debugHash {
    153 			fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err)
    154 		}
    155 		return [HashSize]byte{}, err
    156 	}
    157 	h.Sum(out[:0])
    158 	if debugHash {
    159 		fmt.Fprintf(os.Stderr, "HASH %s: %x\n", file, out)
    160 	}
    161 
    162 	SetFileHash(file, out)
    163 	return out, nil
    164 }
    165 
    166 // SetFileHash sets the hash returned by FileHash for file.
    167 func SetFileHash(file string, sum [HashSize]byte) {
    168 	hashFileCache.Lock()
    169 	if hashFileCache.m == nil {
    170 		hashFileCache.m = make(map[string][HashSize]byte)
    171 	}
    172 	hashFileCache.m[file] = sum
    173 	hashFileCache.Unlock()
    174 }
    175