Home | History | Annotate | Download | only in fileslist
      1 // Copyright 2017 Google Inc. All rights reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // fileslist.py replacement written in GO, which utilizes multi-cores.
     16 
     17 package main
     18 
     19 import (
     20 	"crypto/sha256"
     21 	"encoding/json"
     22 	"flag"
     23 	"fmt"
     24 	"io"
     25 	"os"
     26 	"path/filepath"
     27 	"runtime"
     28 	"sort"
     29 	"strings"
     30 	"sync"
     31 )
     32 
     33 const (
     34 	MAX_DEFAULT_PARA = 24
     35 )
     36 
     37 func defaultPara() int {
     38 	ret := runtime.NumCPU()
     39 	if ret > MAX_DEFAULT_PARA {
     40 		return MAX_DEFAULT_PARA
     41 	}
     42 	return ret
     43 }
     44 
     45 var (
     46 	para = flag.Int("para", defaultPara(), "Number of goroutines")
     47 )
     48 
     49 // Represents each file.
     50 type Node struct {
     51 	SHA256 string
     52 	Name   string // device side path.
     53 	Size   int64
     54 	path   string // host side path.
     55 	stat   os.FileInfo
     56 }
     57 
     58 func newNode(hostPath string, devicePath string, stat os.FileInfo) Node {
     59 	return Node{Name: devicePath, path: hostPath, stat: stat}
     60 }
     61 
     62 // Scan a Node and returns true if it should be added to the result.
     63 func (n *Node) scan() bool {
     64 	n.Size = n.stat.Size()
     65 
     66 	// Calculate SHA256.
     67 	h := sha256.New()
     68 	if n.stat.Mode()&os.ModeSymlink == 0 {
     69 		f, err := os.Open(n.path)
     70 		if err != nil {
     71 			panic(err)
     72 		}
     73 		defer f.Close()
     74 
     75 		if _, err := io.Copy(h, f); err != nil {
     76 			panic(err)
     77 		}
     78 	} else {
     79 		// Hash the content of symlink, not the file it points to.
     80 		s, err := os.Readlink(n.path)
     81 		if err != nil {
     82 			panic(err)
     83 		}
     84 		if _, err := io.WriteString(h, s); err != nil {
     85 			panic(err)
     86 		}
     87 	}
     88 	n.SHA256 = fmt.Sprintf("%x", h.Sum(nil))
     89 	return true
     90 }
     91 
     92 func main() {
     93 	flag.Parse()
     94 
     95 	allOutput := make([]Node, 0, 1024) // Store all outputs.
     96 	mutex := &sync.Mutex{}             // Guard allOutput
     97 
     98 	ch := make(chan Node) // Pass nodes to goroutines.
     99 
    100 	var wg sync.WaitGroup // To wait for all goroutines.
    101 	wg.Add(*para)
    102 
    103 	// Scan files in multiple goroutines.
    104 	for i := 0; i < *para; i++ {
    105 		go func() {
    106 			defer wg.Done()
    107 
    108 			output := make([]Node, 0, 1024) // Local output list.
    109 			for node := range ch {
    110 				if node.scan() {
    111 					output = append(output, node)
    112 				}
    113 			}
    114 			// Add to the global output list.
    115 			mutex.Lock()
    116 			allOutput = append(allOutput, output...)
    117 			mutex.Unlock()
    118 		}()
    119 	}
    120 
    121 	// Walk the directories and find files to scan.
    122 	for _, dir := range flag.Args() {
    123 		absDir, err := filepath.Abs(dir)
    124 		if err != nil {
    125 			panic(err)
    126 		}
    127 		deviceRoot := filepath.Clean(absDir + "/..")
    128 		err = filepath.Walk(dir, func(path string, stat os.FileInfo, err error) error {
    129 			if err != nil {
    130 				panic(err)
    131 			}
    132 			if stat.IsDir() {
    133 				return nil
    134 			}
    135 			absPath, err := filepath.Abs(path)
    136 			if err != nil {
    137 				panic(err)
    138 			}
    139 			devicePath, err := filepath.Rel(deviceRoot, absPath)
    140 			if err != nil {
    141 				panic(err)
    142 			}
    143 			devicePath = "/" + devicePath
    144 			ch <- newNode(absPath, devicePath, stat)
    145 			return nil
    146 		})
    147 		if err != nil {
    148 			panic(err)
    149 		}
    150 	}
    151 
    152 	// Wait until all the goroutines finish.
    153 	close(ch)
    154 	wg.Wait()
    155 
    156 	// Sort the entries and dump as json.
    157 	sort.Slice(allOutput, func(i, j int) bool {
    158 		if allOutput[i].Size > allOutput[j].Size {
    159 			return true
    160 		}
    161 		if allOutput[i].Size == allOutput[j].Size && strings.Compare(allOutput[i].Name, allOutput[j].Name) > 0 {
    162 			return true
    163 		}
    164 		return false
    165 	})
    166 
    167 	j, err := json.MarshalIndent(allOutput, "", "  ")
    168 	if err != nil {
    169 		panic(nil)
    170 	}
    171 
    172 	fmt.Printf("%s\n", j)
    173 }
    174