Home | History | Annotate | Download | only in merge_zips
      1 // Copyright 2017 Google Inc. All rights reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 package main
     16 
     17 import (
     18 	"errors"
     19 	"flag"
     20 	"fmt"
     21 	"hash/crc32"
     22 	"io"
     23 	"io/ioutil"
     24 	"log"
     25 	"os"
     26 	"path/filepath"
     27 	"sort"
     28 
     29 	"github.com/google/blueprint/pathtools"
     30 
     31 	"android/soong/jar"
     32 	"android/soong/third_party/zip"
     33 )
     34 
     35 type fileList []string
     36 
     37 func (f *fileList) String() string {
     38 	return `""`
     39 }
     40 
     41 func (f *fileList) Set(name string) error {
     42 	*f = append(*f, filepath.Clean(name))
     43 
     44 	return nil
     45 }
     46 
     47 type zipsToNotStripSet map[string]bool
     48 
     49 func (s zipsToNotStripSet) String() string {
     50 	return `""`
     51 }
     52 
     53 func (s zipsToNotStripSet) Set(zip_path string) error {
     54 	s[zip_path] = true
     55 
     56 	return nil
     57 }
     58 
     59 var (
     60 	sortEntries      = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
     61 	emulateJar       = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
     62 	emulatePar       = flag.Bool("p", false, "merge zip entries based on par format")
     63 	stripDirs        fileList
     64 	stripFiles       fileList
     65 	zipsToNotStrip   = make(zipsToNotStripSet)
     66 	stripDirEntries  = flag.Bool("D", false, "strip directory entries from the output zip file")
     67 	manifest         = flag.String("m", "", "manifest file to insert in jar")
     68 	pyMain           = flag.String("pm", "", "__main__.py file to insert in par")
     69 	prefix           = flag.String("prefix", "", "A file to prefix to the zip file")
     70 	ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn")
     71 )
     72 
     73 func init() {
     74 	flag.Var(&stripDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
     75 	flag.Var(&stripFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
     76 	flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
     77 }
     78 
     79 func main() {
     80 	flag.Usage = func() {
     81 		fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] output [inputs...]")
     82 		flag.PrintDefaults()
     83 	}
     84 
     85 	// parse args
     86 	flag.Parse()
     87 	args := flag.Args()
     88 	if len(args) < 1 {
     89 		flag.Usage()
     90 		os.Exit(1)
     91 	}
     92 	outputPath := args[0]
     93 	inputs := args[1:]
     94 
     95 	log.SetFlags(log.Lshortfile)
     96 
     97 	// make writer
     98 	output, err := os.Create(outputPath)
     99 	if err != nil {
    100 		log.Fatal(err)
    101 	}
    102 	defer output.Close()
    103 
    104 	var offset int64
    105 	if *prefix != "" {
    106 		prefixFile, err := os.Open(*prefix)
    107 		if err != nil {
    108 			log.Fatal(err)
    109 		}
    110 		offset, err = io.Copy(output, prefixFile)
    111 		if err != nil {
    112 			log.Fatal(err)
    113 		}
    114 	}
    115 
    116 	writer := zip.NewWriter(output)
    117 	defer func() {
    118 		err := writer.Close()
    119 		if err != nil {
    120 			log.Fatal(err)
    121 		}
    122 	}()
    123 	writer.SetOffset(offset)
    124 
    125 	// make readers
    126 	readers := []namedZipReader{}
    127 	for _, input := range inputs {
    128 		reader, err := zip.OpenReader(input)
    129 		if err != nil {
    130 			log.Fatal(err)
    131 		}
    132 		defer reader.Close()
    133 		namedReader := namedZipReader{path: input, reader: &reader.Reader}
    134 		readers = append(readers, namedReader)
    135 	}
    136 
    137 	if *manifest != "" && !*emulateJar {
    138 		log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
    139 	}
    140 
    141 	if *pyMain != "" && !*emulatePar {
    142 		log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm"))
    143 	}
    144 
    145 	// do merge
    146 	err = mergeZips(readers, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar,
    147 		*stripDirEntries, *ignoreDuplicates, []string(stripFiles), []string(stripDirs), map[string]bool(zipsToNotStrip))
    148 	if err != nil {
    149 		log.Fatal(err)
    150 	}
    151 }
    152 
    153 // a namedZipReader reads a .zip file and can say which file it's reading
    154 type namedZipReader struct {
    155 	path   string
    156 	reader *zip.Reader
    157 }
    158 
    159 // a zipEntryPath refers to a file contained in a zip
    160 type zipEntryPath struct {
    161 	zipName   string
    162 	entryName string
    163 }
    164 
    165 func (p zipEntryPath) String() string {
    166 	return p.zipName + "/" + p.entryName
    167 }
    168 
    169 // a zipEntry is a zipSource that pulls its content from another zip
    170 type zipEntry struct {
    171 	path    zipEntryPath
    172 	content *zip.File
    173 }
    174 
    175 func (ze zipEntry) String() string {
    176 	return ze.path.String()
    177 }
    178 
    179 func (ze zipEntry) IsDir() bool {
    180 	return ze.content.FileInfo().IsDir()
    181 }
    182 
    183 func (ze zipEntry) CRC32() uint32 {
    184 	return ze.content.FileHeader.CRC32
    185 }
    186 
    187 func (ze zipEntry) Size() uint64 {
    188 	return ze.content.FileHeader.UncompressedSize64
    189 }
    190 
    191 func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error {
    192 	return zw.CopyFrom(ze.content, dest)
    193 }
    194 
    195 // a bufferEntry is a zipSource that pulls its content from a []byte
    196 type bufferEntry struct {
    197 	fh      *zip.FileHeader
    198 	content []byte
    199 }
    200 
    201 func (be bufferEntry) String() string {
    202 	return "internal buffer"
    203 }
    204 
    205 func (be bufferEntry) IsDir() bool {
    206 	return be.fh.FileInfo().IsDir()
    207 }
    208 
    209 func (be bufferEntry) CRC32() uint32 {
    210 	return crc32.ChecksumIEEE(be.content)
    211 }
    212 
    213 func (be bufferEntry) Size() uint64 {
    214 	return uint64(len(be.content))
    215 }
    216 
    217 func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error {
    218 	w, err := zw.CreateHeader(be.fh)
    219 	if err != nil {
    220 		return err
    221 	}
    222 
    223 	if !be.IsDir() {
    224 		_, err = w.Write(be.content)
    225 		if err != nil {
    226 			return err
    227 		}
    228 	}
    229 
    230 	return nil
    231 }
    232 
    233 type zipSource interface {
    234 	String() string
    235 	IsDir() bool
    236 	CRC32() uint32
    237 	Size() uint64
    238 	WriteToZip(dest string, zw *zip.Writer) error
    239 }
    240 
    241 // a fileMapping specifies to copy a zip entry from one place to another
    242 type fileMapping struct {
    243 	dest   string
    244 	source zipSource
    245 }
    246 
    247 func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, pyMain string,
    248 	sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
    249 	stripFiles, stripDirs []string, zipsToNotStrip map[string]bool) error {
    250 
    251 	sourceByDest := make(map[string]zipSource, 0)
    252 	orderedMappings := []fileMapping{}
    253 
    254 	// if dest already exists returns a non-null zipSource for the existing source
    255 	addMapping := func(dest string, source zipSource) zipSource {
    256 		mapKey := filepath.Clean(dest)
    257 		if existingSource, exists := sourceByDest[mapKey]; exists {
    258 			return existingSource
    259 		}
    260 
    261 		sourceByDest[mapKey] = source
    262 		orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest})
    263 		return nil
    264 	}
    265 
    266 	if manifest != "" {
    267 		if !stripDirEntries {
    268 			dirHeader := jar.MetaDirFileHeader()
    269 			dirSource := bufferEntry{dirHeader, nil}
    270 			addMapping(jar.MetaDir, dirSource)
    271 		}
    272 
    273 		contents, err := ioutil.ReadFile(manifest)
    274 		if err != nil {
    275 			return err
    276 		}
    277 
    278 		fh, buf, err := jar.ManifestFileContents(contents)
    279 		if err != nil {
    280 			return err
    281 		}
    282 
    283 		fileSource := bufferEntry{fh, buf}
    284 		addMapping(jar.ManifestFile, fileSource)
    285 	}
    286 
    287 	if pyMain != "" {
    288 		buf, err := ioutil.ReadFile(pyMain)
    289 		if err != nil {
    290 			return err
    291 		}
    292 		fh := &zip.FileHeader{
    293 			Name:               "__main__.py",
    294 			Method:             zip.Store,
    295 			UncompressedSize64: uint64(len(buf)),
    296 		}
    297 		fh.SetMode(0700)
    298 		fh.SetModTime(jar.DefaultTime)
    299 		fileSource := bufferEntry{fh, buf}
    300 		addMapping("__main__.py", fileSource)
    301 	}
    302 
    303 	if emulatePar {
    304 		// the runfiles packages needs to be populated with "__init__.py".
    305 		newPyPkgs := []string{}
    306 		// the runfiles dirs have been treated as packages.
    307 		existingPyPkgSet := make(map[string]bool)
    308 		// put existing __init__.py files to a set first. This set is used for preventing
    309 		// generated __init__.py files from overwriting existing ones.
    310 		for _, namedReader := range readers {
    311 			for _, file := range namedReader.reader.File {
    312 				if filepath.Base(file.Name) != "__init__.py" {
    313 					continue
    314 				}
    315 				pyPkg := pathBeforeLastSlash(file.Name)
    316 				if _, found := existingPyPkgSet[pyPkg]; found {
    317 					panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name))
    318 				} else {
    319 					existingPyPkgSet[pyPkg] = true
    320 				}
    321 			}
    322 		}
    323 		for _, namedReader := range readers {
    324 			for _, file := range namedReader.reader.File {
    325 				var parentPath string /* the path after trimming last "/" */
    326 				if filepath.Base(file.Name) == "__init__.py" {
    327 					// for existing __init__.py files, we should trim last "/" for twice.
    328 					// eg. a/b/c/__init__.py ---> a/b
    329 					parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name))
    330 				} else {
    331 					parentPath = pathBeforeLastSlash(file.Name)
    332 				}
    333 				populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs)
    334 			}
    335 		}
    336 		for _, pkg := range newPyPkgs {
    337 			var emptyBuf []byte
    338 			fh := &zip.FileHeader{
    339 				Name:               filepath.Join(pkg, "__init__.py"),
    340 				Method:             zip.Store,
    341 				UncompressedSize64: uint64(len(emptyBuf)),
    342 			}
    343 			fh.SetMode(0700)
    344 			fh.SetModTime(jar.DefaultTime)
    345 			fileSource := bufferEntry{fh, emptyBuf}
    346 			addMapping(filepath.Join(pkg, "__init__.py"), fileSource)
    347 		}
    348 	}
    349 	for _, namedReader := range readers {
    350 		_, skipStripThisZip := zipsToNotStrip[namedReader.path]
    351 		for _, file := range namedReader.reader.File {
    352 			if !skipStripThisZip {
    353 				if skip, err := shouldStripEntry(emulateJar, stripFiles, stripDirs, file.Name); err != nil {
    354 					return err
    355 				} else if skip {
    356 					continue
    357 				}
    358 			}
    359 
    360 			if stripDirEntries && file.FileInfo().IsDir() {
    361 				continue
    362 			}
    363 
    364 			// check for other files or directories destined for the same path
    365 			dest := file.Name
    366 
    367 			// make a new entry to add
    368 			source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file}
    369 
    370 			if existingSource := addMapping(dest, source); existingSource != nil {
    371 				// handle duplicates
    372 				if existingSource.IsDir() != source.IsDir() {
    373 					return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
    374 						dest, existingSource, source)
    375 				}
    376 
    377 				if ignoreDuplicates {
    378 					continue
    379 				}
    380 
    381 				if emulateJar &&
    382 					file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass {
    383 					// Skip manifest and module info files that are not from the first input file
    384 					continue
    385 				}
    386 
    387 				if source.IsDir() {
    388 					continue
    389 				}
    390 
    391 				if existingSource.CRC32() == source.CRC32() && existingSource.Size() == source.Size() {
    392 					continue
    393 				}
    394 
    395 				return fmt.Errorf("Duplicate path %v found in %v and %v\n",
    396 					dest, existingSource, source)
    397 			}
    398 		}
    399 	}
    400 
    401 	if emulateJar {
    402 		jarSort(orderedMappings)
    403 	} else if sortEntries {
    404 		alphanumericSort(orderedMappings)
    405 	}
    406 
    407 	for _, entry := range orderedMappings {
    408 		if err := entry.source.WriteToZip(entry.dest, writer); err != nil {
    409 			return err
    410 		}
    411 	}
    412 
    413 	return nil
    414 }
    415 
    416 // Sets the given directory and all its ancestor directories as Python packages.
    417 func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) {
    418 	for pkgPath != "" {
    419 		if _, found := existingPyPkgSet[pkgPath]; !found {
    420 			existingPyPkgSet[pkgPath] = true
    421 			*newPyPkgs = append(*newPyPkgs, pkgPath)
    422 			// Gets its ancestor directory by trimming last slash.
    423 			pkgPath = pathBeforeLastSlash(pkgPath)
    424 		} else {
    425 			break
    426 		}
    427 	}
    428 }
    429 
    430 func pathBeforeLastSlash(path string) string {
    431 	ret := filepath.Dir(path)
    432 	// filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
    433 	if ret == "." || ret == "/" {
    434 		return ""
    435 	}
    436 	return ret
    437 }
    438 
    439 func shouldStripEntry(emulateJar bool, stripFiles, stripDirs []string, name string) (bool, error) {
    440 	for _, dir := range stripDirs {
    441 		dir = filepath.Clean(dir)
    442 		patterns := []string{
    443 			dir + "/",      // the directory itself
    444 			dir + "/**/*",  // files recursively in the directory
    445 			dir + "/**/*/", // directories recursively in the directory
    446 		}
    447 
    448 		for _, pattern := range patterns {
    449 			match, err := pathtools.Match(pattern, name)
    450 			if err != nil {
    451 				return false, fmt.Errorf("%s: %s", err.Error(), pattern)
    452 			} else if match {
    453 				if emulateJar {
    454 					// When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
    455 					// requested.
    456 					// TODO(ccross): which files does this affect?
    457 					if name != jar.MetaDir && name != jar.ManifestFile {
    458 						return true, nil
    459 					}
    460 				}
    461 				return true, nil
    462 			}
    463 		}
    464 	}
    465 
    466 	for _, pattern := range stripFiles {
    467 		if match, err := pathtools.Match(pattern, name); err != nil {
    468 			return false, fmt.Errorf("%s: %s", err.Error(), pattern)
    469 		} else if match {
    470 			return true, nil
    471 		}
    472 	}
    473 	return false, nil
    474 }
    475 
    476 func jarSort(files []fileMapping) {
    477 	sort.SliceStable(files, func(i, j int) bool {
    478 		return jar.EntryNamesLess(files[i].dest, files[j].dest)
    479 	})
    480 }
    481 
    482 func alphanumericSort(files []fileMapping) {
    483 	sort.SliceStable(files, func(i, j int) bool {
    484 		return files[i].dest < files[j].dest
    485 	})
    486 }
    487