Home | History | Annotate | Download | only in soong_zip
      1 // Copyright 2015 Google Inc. All rights reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 package main
     16 
     17 import (
     18 	"bytes"
     19 	"compress/flate"
     20 	"flag"
     21 	"fmt"
     22 	"hash/crc32"
     23 	"io"
     24 	"io/ioutil"
     25 	"log"
     26 	"os"
     27 	"path/filepath"
     28 	"runtime"
     29 	"runtime/pprof"
     30 	"runtime/trace"
     31 	"strings"
     32 	"sync"
     33 	"time"
     34 
     35 	"android/soong/third_party/zip"
     36 )
     37 
     38 // Block size used during parallel compression of a single file.
     39 const parallelBlockSize = 1 * 1024 * 1024 // 1MB
     40 
     41 // Minimum file size to use parallel compression. It requires more
     42 // flate.Writer allocations, since we can't change the dictionary
     43 // during Reset
     44 const minParallelFileSize = parallelBlockSize * 6
     45 
     46 // Size of the ZIP compression window (32KB)
     47 const windowSize = 32 * 1024
     48 
     49 type nopCloser struct {
     50 	io.Writer
     51 }
     52 
     53 func (nopCloser) Close() error {
     54 	return nil
     55 }
     56 
     57 type fileArg struct {
     58 	pathPrefixInZip, sourcePrefixToStrip string
     59 	sourceFiles                          []string
     60 }
     61 
     62 type pathMapping struct {
     63 	dest, src string
     64 	zipMethod uint16
     65 }
     66 
     67 type uniqueSet map[string]bool
     68 
     69 func (u *uniqueSet) String() string {
     70 	return `""`
     71 }
     72 
     73 func (u *uniqueSet) Set(s string) error {
     74 	if _, found := (*u)[s]; found {
     75 		return fmt.Errorf("File %q was specified twice as a file to not deflate", s)
     76 	} else {
     77 		(*u)[s] = true
     78 	}
     79 
     80 	return nil
     81 }
     82 
     83 type fileArgs []fileArg
     84 
     85 type file struct{}
     86 
     87 type listFiles struct{}
     88 
     89 func (f *file) String() string {
     90 	return `""`
     91 }
     92 
     93 func (f *file) Set(s string) error {
     94 	if *relativeRoot == "" {
     95 		return fmt.Errorf("must pass -C before -f or -l")
     96 	}
     97 
     98 	fArgs = append(fArgs, fileArg{
     99 		pathPrefixInZip:     filepath.Clean(*rootPrefix),
    100 		sourcePrefixToStrip: filepath.Clean(*relativeRoot),
    101 		sourceFiles:         []string{s},
    102 	})
    103 
    104 	return nil
    105 }
    106 
    107 func (l *listFiles) String() string {
    108 	return `""`
    109 }
    110 
    111 func (l *listFiles) Set(s string) error {
    112 	if *relativeRoot == "" {
    113 		return fmt.Errorf("must pass -C before -f or -l")
    114 	}
    115 
    116 	list, err := ioutil.ReadFile(s)
    117 	if err != nil {
    118 		return err
    119 	}
    120 
    121 	fArgs = append(fArgs, fileArg{
    122 		pathPrefixInZip:     filepath.Clean(*rootPrefix),
    123 		sourcePrefixToStrip: filepath.Clean(*relativeRoot),
    124 		sourceFiles:         strings.Split(string(list), "\n"),
    125 	})
    126 
    127 	return nil
    128 }
    129 
    130 var (
    131 	out          = flag.String("o", "", "file to write zip file to")
    132 	manifest     = flag.String("m", "", "input jar manifest file name")
    133 	directories  = flag.Bool("d", false, "include directories in zip")
    134 	rootPrefix   = flag.String("P", "", "path prefix within the zip at which to place files")
    135 	relativeRoot = flag.String("C", "", "path to use as relative root of files in next -f or -l argument")
    136 	parallelJobs = flag.Int("j", runtime.NumCPU(), "number of parallel threads to use")
    137 	compLevel    = flag.Int("L", 5, "deflate compression level (0-9)")
    138 
    139 	fArgs            fileArgs
    140 	nonDeflatedFiles = make(uniqueSet)
    141 
    142 	cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file")
    143 	traceFile  = flag.String("trace", "", "write trace to file")
    144 )
    145 
    146 func init() {
    147 	flag.Var(&listFiles{}, "l", "file containing list of .class files")
    148 	flag.Var(&file{}, "f", "file to include in zip")
    149 	flag.Var(&nonDeflatedFiles, "s", "file path to be stored within the zip without compression")
    150 }
    151 
    152 func usage() {
    153 	fmt.Fprintf(os.Stderr, "usage: soong_zip -o zipfile [-m manifest] -C dir [-f|-l file]...\n")
    154 	flag.PrintDefaults()
    155 	os.Exit(2)
    156 }
    157 
    158 type zipWriter struct {
    159 	time        time.Time
    160 	createdDirs map[string]bool
    161 	directories bool
    162 
    163 	errors   chan error
    164 	writeOps chan chan *zipEntry
    165 
    166 	rateLimit *RateLimit
    167 
    168 	compressorPool sync.Pool
    169 	compLevel      int
    170 }
    171 
    172 type zipEntry struct {
    173 	fh *zip.FileHeader
    174 
    175 	// List of delayed io.Reader
    176 	futureReaders chan chan io.Reader
    177 }
    178 
    179 func main() {
    180 	flag.Parse()
    181 
    182 	if *cpuProfile != "" {
    183 		f, err := os.Create(*cpuProfile)
    184 		if err != nil {
    185 			fmt.Fprintln(os.Stderr, err.Error())
    186 			os.Exit(1)
    187 		}
    188 		defer f.Close()
    189 		pprof.StartCPUProfile(f)
    190 		defer pprof.StopCPUProfile()
    191 	}
    192 
    193 	if *traceFile != "" {
    194 		f, err := os.Create(*traceFile)
    195 		if err != nil {
    196 			fmt.Fprintln(os.Stderr, err.Error())
    197 			os.Exit(1)
    198 		}
    199 		defer f.Close()
    200 		err = trace.Start(f)
    201 		if err != nil {
    202 			fmt.Fprintln(os.Stderr, err.Error())
    203 			os.Exit(1)
    204 		}
    205 		defer trace.Stop()
    206 	}
    207 
    208 	if *out == "" {
    209 		fmt.Fprintf(os.Stderr, "error: -o is required\n")
    210 		usage()
    211 	}
    212 
    213 	w := &zipWriter{
    214 		time:        time.Date(2009, 1, 1, 0, 0, 0, 0, time.UTC),
    215 		createdDirs: make(map[string]bool),
    216 		directories: *directories,
    217 		compLevel:   *compLevel,
    218 	}
    219 
    220 	pathMappings := []pathMapping{}
    221 	set := make(map[string]string)
    222 
    223 	for _, fa := range fArgs {
    224 		for _, src := range fa.sourceFiles {
    225 			if err := fillPathPairs(fa.pathPrefixInZip,
    226 				fa.sourcePrefixToStrip, src, set, &pathMappings); err != nil {
    227 				log.Fatal(err)
    228 			}
    229 		}
    230 	}
    231 
    232 	err := w.write(*out, pathMappings, *manifest)
    233 	if err != nil {
    234 		fmt.Fprintln(os.Stderr, err.Error())
    235 		os.Exit(1)
    236 	}
    237 }
    238 
    239 func fillPathPairs(prefix, rel, src string, set map[string]string, pathMappings *[]pathMapping) error {
    240 	src = strings.TrimSpace(src)
    241 	if src == "" {
    242 		return nil
    243 	}
    244 	src = filepath.Clean(src)
    245 	dest, err := filepath.Rel(rel, src)
    246 	if err != nil {
    247 		return err
    248 	}
    249 	dest = filepath.Join(prefix, dest)
    250 
    251 	if _, found := set[dest]; found {
    252 		return fmt.Errorf("found two file paths to be copied into dest path: %q,"+
    253 			" both [%q]%q and [%q]%q!",
    254 			dest, dest, src, dest, set[dest])
    255 	} else {
    256 		set[dest] = src
    257 	}
    258 
    259 	zipMethod := zip.Deflate
    260 	if _, found := nonDeflatedFiles[dest]; found {
    261 		zipMethod = zip.Store
    262 	}
    263 	*pathMappings = append(*pathMappings,
    264 		pathMapping{dest: dest, src: src, zipMethod: zipMethod})
    265 
    266 	return nil
    267 }
    268 
    269 func (z *zipWriter) write(out string, pathMappings []pathMapping, manifest string) error {
    270 	f, err := os.Create(out)
    271 	if err != nil {
    272 		return err
    273 	}
    274 
    275 	defer f.Close()
    276 	defer func() {
    277 		if err != nil {
    278 			os.Remove(out)
    279 		}
    280 	}()
    281 
    282 	z.errors = make(chan error)
    283 	defer close(z.errors)
    284 
    285 	// This channel size can be essentially unlimited -- it's used as a fifo
    286 	// queue decouple the CPU and IO loads. Directories don't require any
    287 	// compression time, but still cost some IO. Similar with small files that
    288 	// can be very fast to compress. Some files that are more difficult to
    289 	// compress won't take a corresponding longer time writing out.
    290 	//
    291 	// The optimum size here depends on your CPU and IO characteristics, and
    292 	// the the layout of your zip file. 1000 was chosen mostly at random as
    293 	// something that worked reasonably well for a test file.
    294 	//
    295 	// The RateLimit object will put the upper bounds on the number of
    296 	// parallel compressions and outstanding buffers.
    297 	z.writeOps = make(chan chan *zipEntry, 1000)
    298 	z.rateLimit = NewRateLimit(*parallelJobs, 0)
    299 	defer z.rateLimit.Stop()
    300 
    301 	go func() {
    302 		var err error
    303 		defer close(z.writeOps)
    304 
    305 		for _, ele := range pathMappings {
    306 			err = z.writeFile(ele.dest, ele.src, ele.zipMethod)
    307 			if err != nil {
    308 				z.errors <- err
    309 				return
    310 			}
    311 		}
    312 
    313 		if manifest != "" {
    314 			err = z.writeFile("META-INF/MANIFEST.MF", manifest, zip.Deflate)
    315 			if err != nil {
    316 				z.errors <- err
    317 				return
    318 			}
    319 		}
    320 	}()
    321 
    322 	zipw := zip.NewWriter(f)
    323 
    324 	var currentWriteOpChan chan *zipEntry
    325 	var currentWriter io.WriteCloser
    326 	var currentReaders chan chan io.Reader
    327 	var currentReader chan io.Reader
    328 	var done bool
    329 
    330 	for !done {
    331 		var writeOpsChan chan chan *zipEntry
    332 		var writeOpChan chan *zipEntry
    333 		var readersChan chan chan io.Reader
    334 
    335 		if currentReader != nil {
    336 			// Only read and process errors
    337 		} else if currentReaders != nil {
    338 			readersChan = currentReaders
    339 		} else if currentWriteOpChan != nil {
    340 			writeOpChan = currentWriteOpChan
    341 		} else {
    342 			writeOpsChan = z.writeOps
    343 		}
    344 
    345 		select {
    346 		case writeOp, ok := <-writeOpsChan:
    347 			if !ok {
    348 				done = true
    349 			}
    350 
    351 			currentWriteOpChan = writeOp
    352 
    353 		case op := <-writeOpChan:
    354 			currentWriteOpChan = nil
    355 
    356 			if op.fh.Method == zip.Deflate {
    357 				currentWriter, err = zipw.CreateCompressedHeader(op.fh)
    358 			} else {
    359 				var zw io.Writer
    360 				zw, err = zipw.CreateHeader(op.fh)
    361 				currentWriter = nopCloser{zw}
    362 			}
    363 			if err != nil {
    364 				return err
    365 			}
    366 
    367 			currentReaders = op.futureReaders
    368 			if op.futureReaders == nil {
    369 				currentWriter.Close()
    370 				currentWriter = nil
    371 			}
    372 
    373 		case futureReader, ok := <-readersChan:
    374 			if !ok {
    375 				// Done with reading
    376 				currentWriter.Close()
    377 				currentWriter = nil
    378 				currentReaders = nil
    379 			}
    380 
    381 			currentReader = futureReader
    382 
    383 		case reader := <-currentReader:
    384 			var count int64
    385 			count, err = io.Copy(currentWriter, reader)
    386 			if err != nil {
    387 				return err
    388 			}
    389 			z.rateLimit.Release(int(count))
    390 
    391 			currentReader = nil
    392 
    393 		case err = <-z.errors:
    394 			return err
    395 		}
    396 	}
    397 
    398 	// One last chance to catch an error
    399 	select {
    400 	case err = <-z.errors:
    401 		return err
    402 	default:
    403 		zipw.Close()
    404 		return nil
    405 	}
    406 }
    407 
    408 func (z *zipWriter) writeFile(dest, src string, method uint16) error {
    409 	var fileSize int64
    410 	var executable bool
    411 
    412 	if s, err := os.Lstat(src); err != nil {
    413 		return err
    414 	} else if s.IsDir() {
    415 		if z.directories {
    416 			return z.writeDirectory(dest)
    417 		}
    418 		return nil
    419 	} else if s.Mode()&os.ModeSymlink != 0 {
    420 		return z.writeSymlink(dest, src)
    421 	} else if !s.Mode().IsRegular() {
    422 		return fmt.Errorf("%s is not a file, directory, or symlink", src)
    423 	} else {
    424 		fileSize = s.Size()
    425 		executable = s.Mode()&0100 != 0
    426 	}
    427 
    428 	if z.directories {
    429 		dir, _ := filepath.Split(dest)
    430 		err := z.writeDirectory(dir)
    431 		if err != nil {
    432 			return err
    433 		}
    434 	}
    435 
    436 	compressChan := make(chan *zipEntry, 1)
    437 	z.writeOps <- compressChan
    438 
    439 	// Pre-fill a zipEntry, it will be sent in the compressChan once
    440 	// we're sure about the Method and CRC.
    441 	ze := &zipEntry{
    442 		fh: &zip.FileHeader{
    443 			Name:   dest,
    444 			Method: method,
    445 
    446 			UncompressedSize64: uint64(fileSize),
    447 		},
    448 	}
    449 	ze.fh.SetModTime(z.time)
    450 	if executable {
    451 		ze.fh.SetMode(0700)
    452 	}
    453 
    454 	r, err := os.Open(src)
    455 	if err != nil {
    456 		return err
    457 	}
    458 
    459 	exec := z.rateLimit.RequestExecution()
    460 
    461 	if method == zip.Deflate && fileSize >= minParallelFileSize {
    462 		wg := new(sync.WaitGroup)
    463 
    464 		// Allocate enough buffer to hold all readers. We'll limit
    465 		// this based on actual buffer sizes in RateLimit.
    466 		ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1)
    467 
    468 		// Calculate the CRC in the background, since reading the entire
    469 		// file could take a while.
    470 		//
    471 		// We could split this up into chuncks as well, but it's faster
    472 		// than the compression. Due to the Go Zip API, we also need to
    473 		// know the result before we can begin writing the compressed
    474 		// data out to the zipfile.
    475 		wg.Add(1)
    476 		go z.crcFile(r, ze, exec, compressChan, wg)
    477 
    478 		for start := int64(0); start < fileSize; start += parallelBlockSize {
    479 			sr := io.NewSectionReader(r, start, parallelBlockSize)
    480 			resultChan := make(chan io.Reader, 1)
    481 			ze.futureReaders <- resultChan
    482 
    483 			exec := z.rateLimit.RequestExecution()
    484 
    485 			last := !(start+parallelBlockSize < fileSize)
    486 			var dict []byte
    487 			if start >= windowSize {
    488 				dict, err = ioutil.ReadAll(io.NewSectionReader(r, start-windowSize, windowSize))
    489 			}
    490 
    491 			wg.Add(1)
    492 			go z.compressPartialFile(sr, dict, last, exec, resultChan, wg)
    493 		}
    494 
    495 		close(ze.futureReaders)
    496 
    497 		// Close the file handle after all readers are done
    498 		go func(wg *sync.WaitGroup, f *os.File) {
    499 			wg.Wait()
    500 			f.Close()
    501 		}(wg, r)
    502 	} else {
    503 		go z.compressWholeFile(ze, r, exec, compressChan)
    504 	}
    505 
    506 	return nil
    507 }
    508 
    509 func (z *zipWriter) crcFile(r io.Reader, ze *zipEntry, exec Execution, resultChan chan *zipEntry, wg *sync.WaitGroup) {
    510 	defer wg.Done()
    511 	defer exec.Finish(0)
    512 
    513 	crc := crc32.NewIEEE()
    514 	_, err := io.Copy(crc, r)
    515 	if err != nil {
    516 		z.errors <- err
    517 		return
    518 	}
    519 
    520 	ze.fh.CRC32 = crc.Sum32()
    521 	resultChan <- ze
    522 	close(resultChan)
    523 }
    524 
    525 func (z *zipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, exec Execution, resultChan chan io.Reader, wg *sync.WaitGroup) {
    526 	defer wg.Done()
    527 
    528 	result, err := z.compressBlock(r, dict, last)
    529 	if err != nil {
    530 		z.errors <- err
    531 		return
    532 	}
    533 
    534 	exec.Finish(result.Len())
    535 	resultChan <- result
    536 }
    537 
    538 func (z *zipWriter) compressBlock(r io.Reader, dict []byte, last bool) (*bytes.Buffer, error) {
    539 	buf := new(bytes.Buffer)
    540 	var fw *flate.Writer
    541 	var err error
    542 	if len(dict) > 0 {
    543 		// There's no way to Reset a Writer with a new dictionary, so
    544 		// don't use the Pool
    545 		fw, err = flate.NewWriterDict(buf, z.compLevel, dict)
    546 	} else {
    547 		var ok bool
    548 		if fw, ok = z.compressorPool.Get().(*flate.Writer); ok {
    549 			fw.Reset(buf)
    550 		} else {
    551 			fw, err = flate.NewWriter(buf, z.compLevel)
    552 		}
    553 		defer z.compressorPool.Put(fw)
    554 	}
    555 	if err != nil {
    556 		return nil, err
    557 	}
    558 
    559 	_, err = io.Copy(fw, r)
    560 	if err != nil {
    561 		return nil, err
    562 	}
    563 	if last {
    564 		fw.Close()
    565 	} else {
    566 		fw.Flush()
    567 	}
    568 
    569 	return buf, nil
    570 }
    571 
    572 func (z *zipWriter) compressWholeFile(ze *zipEntry, r *os.File, exec Execution, compressChan chan *zipEntry) {
    573 	var bufSize int
    574 
    575 	defer r.Close()
    576 
    577 	crc := crc32.NewIEEE()
    578 	_, err := io.Copy(crc, r)
    579 	if err != nil {
    580 		z.errors <- err
    581 		return
    582 	}
    583 
    584 	ze.fh.CRC32 = crc.Sum32()
    585 
    586 	_, err = r.Seek(0, 0)
    587 	if err != nil {
    588 		z.errors <- err
    589 		return
    590 	}
    591 
    592 	readFile := func(r *os.File) ([]byte, error) {
    593 		_, err = r.Seek(0, 0)
    594 		if err != nil {
    595 			return nil, err
    596 		}
    597 
    598 		buf, err := ioutil.ReadAll(r)
    599 		if err != nil {
    600 			return nil, err
    601 		}
    602 
    603 		return buf, nil
    604 	}
    605 
    606 	ze.futureReaders = make(chan chan io.Reader, 1)
    607 	futureReader := make(chan io.Reader, 1)
    608 	ze.futureReaders <- futureReader
    609 	close(ze.futureReaders)
    610 
    611 	if ze.fh.Method == zip.Deflate {
    612 		compressed, err := z.compressBlock(r, nil, true)
    613 		if err != nil {
    614 			z.errors <- err
    615 			return
    616 		}
    617 		if uint64(compressed.Len()) < ze.fh.UncompressedSize64 {
    618 			futureReader <- compressed
    619 			bufSize = compressed.Len()
    620 		} else {
    621 			buf, err := readFile(r)
    622 			if err != nil {
    623 				z.errors <- err
    624 				return
    625 			}
    626 			ze.fh.Method = zip.Store
    627 			futureReader <- bytes.NewReader(buf)
    628 			bufSize = int(ze.fh.UncompressedSize64)
    629 		}
    630 	} else {
    631 		buf, err := readFile(r)
    632 		if err != nil {
    633 			z.errors <- err
    634 			return
    635 		}
    636 		ze.fh.Method = zip.Store
    637 		futureReader <- bytes.NewReader(buf)
    638 		bufSize = int(ze.fh.UncompressedSize64)
    639 	}
    640 
    641 	exec.Finish(bufSize)
    642 	close(futureReader)
    643 
    644 	compressChan <- ze
    645 	close(compressChan)
    646 }
    647 
    648 func (z *zipWriter) writeDirectory(dir string) error {
    649 	if dir != "" && !strings.HasSuffix(dir, "/") {
    650 		dir = dir + "/"
    651 	}
    652 
    653 	for dir != "" && dir != "./" && !z.createdDirs[dir] {
    654 		z.createdDirs[dir] = true
    655 
    656 		dirHeader := &zip.FileHeader{
    657 			Name: dir,
    658 		}
    659 		dirHeader.SetMode(0700 | os.ModeDir)
    660 		dirHeader.SetModTime(z.time)
    661 
    662 		ze := make(chan *zipEntry, 1)
    663 		ze <- &zipEntry{
    664 			fh: dirHeader,
    665 		}
    666 		close(ze)
    667 		z.writeOps <- ze
    668 
    669 		dir, _ = filepath.Split(dir)
    670 	}
    671 
    672 	return nil
    673 }
    674 
    675 func (z *zipWriter) writeSymlink(rel, file string) error {
    676 	if z.directories {
    677 		dir, _ := filepath.Split(rel)
    678 		if err := z.writeDirectory(dir); err != nil {
    679 			return err
    680 		}
    681 	}
    682 
    683 	fileHeader := &zip.FileHeader{
    684 		Name: rel,
    685 	}
    686 	fileHeader.SetModTime(z.time)
    687 	fileHeader.SetMode(0700 | os.ModeSymlink)
    688 
    689 	dest, err := os.Readlink(file)
    690 	if err != nil {
    691 		return err
    692 	}
    693 
    694 	ze := make(chan *zipEntry, 1)
    695 	futureReaders := make(chan chan io.Reader, 1)
    696 	futureReader := make(chan io.Reader, 1)
    697 	futureReaders <- futureReader
    698 	close(futureReaders)
    699 	futureReader <- bytes.NewBufferString(dest)
    700 	close(futureReader)
    701 
    702 	// We didn't ask permission to execute, since this should be very short
    703 	// but we still need to increment the outstanding buffer sizes, since
    704 	// the read will decrement the buffer size.
    705 	z.rateLimit.Release(-len(dest))
    706 
    707 	ze <- &zipEntry{
    708 		fh:            fileHeader,
    709 		futureReaders: futureReaders,
    710 	}
    711 	close(ze)
    712 	z.writeOps <- ze
    713 
    714 	return nil
    715 }
    716