Home | History | Annotate | Download | only in buildid
      1 // Copyright 2017 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package buildid
      6 
      7 import (
      8 	"bytes"
      9 	"crypto/sha256"
     10 	"fmt"
     11 	"io"
     12 )
     13 
     14 // FindAndHash reads all of r and returns the offsets of occurrences of id.
     15 // While reading, findAndHash also computes and returns
     16 // a hash of the content of r, but with occurrences of id replaced by zeros.
     17 // FindAndHash reads bufSize bytes from r at a time.
     18 // If bufSize == 0, FindAndHash uses a reasonable default.
     19 func FindAndHash(r io.Reader, id string, bufSize int) (matches []int64, hash [32]byte, err error) {
     20 	if bufSize == 0 {
     21 		bufSize = 31 * 1024 // bufSize+little will likely fit in 32 kB
     22 	}
     23 	if len(id) > bufSize {
     24 		return nil, [32]byte{}, fmt.Errorf("buildid.FindAndHash: buffer too small")
     25 	}
     26 	zeros := make([]byte, len(id))
     27 	idBytes := []byte(id)
     28 
     29 	// The strategy is to read the file through buf, looking for id,
     30 	// but we need to worry about what happens if id is broken up
     31 	// and returned in parts by two different reads.
     32 	// We allocate a tiny buffer (at least len(id)) and a big buffer (bufSize bytes)
     33 	// next to each other in memory and then copy the tail of
     34 	// one read into the tiny buffer before reading new data into the big buffer.
     35 	// The search for id is over the entire tiny+big buffer.
     36 	tiny := (len(id) + 127) &^ 127 // round up to 128-aligned
     37 	buf := make([]byte, tiny+bufSize)
     38 	h := sha256.New()
     39 	start := tiny
     40 	for offset := int64(0); ; {
     41 		// The file offset maintained by the loop corresponds to &buf[tiny].
     42 		// buf[start:tiny] is left over from previous iteration.
     43 		// After reading n bytes into buf[tiny:], we process buf[start:tiny+n].
     44 		n, err := io.ReadFull(r, buf[tiny:])
     45 		if err != io.ErrUnexpectedEOF && err != io.EOF && err != nil {
     46 			return nil, [32]byte{}, err
     47 		}
     48 
     49 		// Process any matches.
     50 		for {
     51 			i := bytes.Index(buf[start:tiny+n], idBytes)
     52 			if i < 0 {
     53 				break
     54 			}
     55 			matches = append(matches, offset+int64(start+i-tiny))
     56 			h.Write(buf[start : start+i])
     57 			h.Write(zeros)
     58 			start += i + len(id)
     59 		}
     60 		if n < bufSize {
     61 			// Did not fill buffer, must be at end of file.
     62 			h.Write(buf[start : tiny+n])
     63 			break
     64 		}
     65 
     66 		// Process all but final tiny bytes of buf (bufSize = len(buf)-tiny).
     67 		// Note that start > len(buf)-tiny is possible, if the search above
     68 		// found an id ending in the final tiny fringe. That's OK.
     69 		if start < len(buf)-tiny {
     70 			h.Write(buf[start : len(buf)-tiny])
     71 			start = len(buf) - tiny
     72 		}
     73 
     74 		// Slide ending tiny-sized fringe to beginning of buffer.
     75 		copy(buf[0:], buf[bufSize:])
     76 		start -= bufSize
     77 		offset += int64(bufSize)
     78 	}
     79 	h.Sum(hash[:0])
     80 	return matches, hash, nil
     81 }
     82 
     83 func Rewrite(w io.WriterAt, pos []int64, id string) error {
     84 	b := []byte(id)
     85 	for _, p := range pos {
     86 		if _, err := w.WriteAt(b, p); err != nil {
     87 			return err
     88 		}
     89 	}
     90 	return nil
     91 }
     92