Home | History | Annotate | Download | only in merge_zips
      1 // Copyright 2017 Google Inc. All rights reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 package main
     16 
     17 import (
     18 	"errors"
     19 	"flag"
     20 	"fmt"
     21 	"hash/crc32"
     22 	"io/ioutil"
     23 	"log"
     24 	"os"
     25 	"path/filepath"
     26 	"sort"
     27 	"strings"
     28 
     29 	"android/soong/jar"
     30 	"android/soong/third_party/zip"
     31 )
     32 
     33 type fileList []string
     34 
     35 func (f *fileList) String() string {
     36 	return `""`
     37 }
     38 
     39 func (f *fileList) Set(name string) error {
     40 	*f = append(*f, filepath.Clean(name))
     41 
     42 	return nil
     43 }
     44 
     45 type zipsToNotStripSet map[string]bool
     46 
     47 func (s zipsToNotStripSet) String() string {
     48 	return `""`
     49 }
     50 
     51 func (s zipsToNotStripSet) Set(zip_path string) error {
     52 	s[zip_path] = true
     53 
     54 	return nil
     55 }
     56 
     57 var (
     58 	sortEntries      = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
     59 	emulateJar       = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
     60 	emulatePar       = flag.Bool("p", false, "merge zip entries based on par format")
     61 	stripDirs        fileList
     62 	stripFiles       fileList
     63 	zipsToNotStrip   = make(zipsToNotStripSet)
     64 	stripDirEntries  = flag.Bool("D", false, "strip directory entries from the output zip file")
     65 	manifest         = flag.String("m", "", "manifest file to insert in jar")
     66 	entrypoint       = flag.String("e", "", "par entrypoint file to insert in par")
     67 	ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn")
     68 )
     69 
     70 func init() {
     71 	flag.Var(&stripDirs, "stripDir", "the prefix of file path to be excluded from the output zip")
     72 	flag.Var(&stripFiles, "stripFile", "filenames to be excluded from the output zip, accepts wildcards")
     73 	flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
     74 }
     75 
     76 func main() {
     77 	flag.Usage = func() {
     78 		fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [-e entrypoint] output [inputs...]")
     79 		flag.PrintDefaults()
     80 	}
     81 
     82 	// parse args
     83 	flag.Parse()
     84 	args := flag.Args()
     85 	if len(args) < 1 {
     86 		flag.Usage()
     87 		os.Exit(1)
     88 	}
     89 	outputPath := args[0]
     90 	inputs := args[1:]
     91 
     92 	log.SetFlags(log.Lshortfile)
     93 
     94 	// make writer
     95 	output, err := os.Create(outputPath)
     96 	if err != nil {
     97 		log.Fatal(err)
     98 	}
     99 	defer output.Close()
    100 	writer := zip.NewWriter(output)
    101 	defer func() {
    102 		err := writer.Close()
    103 		if err != nil {
    104 			log.Fatal(err)
    105 		}
    106 	}()
    107 
    108 	// make readers
    109 	readers := []namedZipReader{}
    110 	for _, input := range inputs {
    111 		reader, err := zip.OpenReader(input)
    112 		if err != nil {
    113 			log.Fatal(err)
    114 		}
    115 		defer reader.Close()
    116 		namedReader := namedZipReader{path: input, reader: reader}
    117 		readers = append(readers, namedReader)
    118 	}
    119 
    120 	if *manifest != "" && !*emulateJar {
    121 		log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
    122 	}
    123 
    124 	if *entrypoint != "" && !*emulatePar {
    125 		log.Fatal(errors.New("must specify -p when specifying a entrypoint via -e"))
    126 	}
    127 
    128 	// do merge
    129 	err = mergeZips(readers, writer, *manifest, *entrypoint, *sortEntries, *emulateJar, *emulatePar,
    130 		*stripDirEntries, *ignoreDuplicates)
    131 	if err != nil {
    132 		log.Fatal(err)
    133 	}
    134 }
    135 
    136 // a namedZipReader reads a .zip file and can say which file it's reading
    137 type namedZipReader struct {
    138 	path   string
    139 	reader *zip.ReadCloser
    140 }
    141 
    142 // a zipEntryPath refers to a file contained in a zip
    143 type zipEntryPath struct {
    144 	zipName   string
    145 	entryName string
    146 }
    147 
    148 func (p zipEntryPath) String() string {
    149 	return p.zipName + "/" + p.entryName
    150 }
    151 
    152 // a zipEntry is a zipSource that pulls its content from another zip
    153 type zipEntry struct {
    154 	path    zipEntryPath
    155 	content *zip.File
    156 }
    157 
    158 func (ze zipEntry) String() string {
    159 	return ze.path.String()
    160 }
    161 
    162 func (ze zipEntry) IsDir() bool {
    163 	return ze.content.FileInfo().IsDir()
    164 }
    165 
    166 func (ze zipEntry) CRC32() uint32 {
    167 	return ze.content.FileHeader.CRC32
    168 }
    169 
    170 func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error {
    171 	return zw.CopyFrom(ze.content, dest)
    172 }
    173 
    174 // a bufferEntry is a zipSource that pulls its content from a []byte
    175 type bufferEntry struct {
    176 	fh      *zip.FileHeader
    177 	content []byte
    178 }
    179 
    180 func (be bufferEntry) String() string {
    181 	return "internal buffer"
    182 }
    183 
    184 func (be bufferEntry) IsDir() bool {
    185 	return be.fh.FileInfo().IsDir()
    186 }
    187 
    188 func (be bufferEntry) CRC32() uint32 {
    189 	return crc32.ChecksumIEEE(be.content)
    190 }
    191 
    192 func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error {
    193 	w, err := zw.CreateHeader(be.fh)
    194 	if err != nil {
    195 		return err
    196 	}
    197 
    198 	if !be.IsDir() {
    199 		_, err = w.Write(be.content)
    200 		if err != nil {
    201 			return err
    202 		}
    203 	}
    204 
    205 	return nil
    206 }
    207 
    208 type zipSource interface {
    209 	String() string
    210 	IsDir() bool
    211 	CRC32() uint32
    212 	WriteToZip(dest string, zw *zip.Writer) error
    213 }
    214 
    215 // a fileMapping specifies to copy a zip entry from one place to another
    216 type fileMapping struct {
    217 	dest   string
    218 	source zipSource
    219 }
    220 
    221 func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, entrypoint string,
    222 	sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool) error {
    223 
    224 	sourceByDest := make(map[string]zipSource, 0)
    225 	orderedMappings := []fileMapping{}
    226 
    227 	// if dest already exists returns a non-null zipSource for the existing source
    228 	addMapping := func(dest string, source zipSource) zipSource {
    229 		mapKey := filepath.Clean(dest)
    230 		if existingSource, exists := sourceByDest[mapKey]; exists {
    231 			return existingSource
    232 		}
    233 
    234 		sourceByDest[mapKey] = source
    235 		orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest})
    236 		return nil
    237 	}
    238 
    239 	if manifest != "" {
    240 		if !stripDirEntries {
    241 			dirHeader := jar.MetaDirFileHeader()
    242 			dirSource := bufferEntry{dirHeader, nil}
    243 			addMapping(jar.MetaDir, dirSource)
    244 		}
    245 
    246 		fh, buf, err := jar.ManifestFileContents(manifest)
    247 		if err != nil {
    248 			return err
    249 		}
    250 
    251 		fileSource := bufferEntry{fh, buf}
    252 		addMapping(jar.ManifestFile, fileSource)
    253 	}
    254 
    255 	if entrypoint != "" {
    256 		buf, err := ioutil.ReadFile(entrypoint)
    257 		if err != nil {
    258 			return err
    259 		}
    260 		fh := &zip.FileHeader{
    261 			Name:               "entry_point.txt",
    262 			Method:             zip.Store,
    263 			UncompressedSize64: uint64(len(buf)),
    264 		}
    265 		fh.SetMode(0700)
    266 		fh.SetModTime(jar.DefaultTime)
    267 		fileSource := bufferEntry{fh, buf}
    268 		addMapping("entry_point.txt", fileSource)
    269 	}
    270 
    271 	if emulatePar {
    272 		// the runfiles packages needs to be populated with "__init__.py".
    273 		newPyPkgs := []string{}
    274 		// the runfiles dirs have been treated as packages.
    275 		existingPyPkgSet := make(map[string]bool)
    276 		// put existing __init__.py files to a set first. This set is used for preventing
    277 		// generated __init__.py files from overwriting existing ones.
    278 		for _, namedReader := range readers {
    279 			for _, file := range namedReader.reader.File {
    280 				if filepath.Base(file.Name) != "__init__.py" {
    281 					continue
    282 				}
    283 				pyPkg := pathBeforeLastSlash(file.Name)
    284 				if _, found := existingPyPkgSet[pyPkg]; found {
    285 					panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name))
    286 				} else {
    287 					existingPyPkgSet[pyPkg] = true
    288 				}
    289 			}
    290 		}
    291 		for _, namedReader := range readers {
    292 			for _, file := range namedReader.reader.File {
    293 				var parentPath string /* the path after trimming last "/" */
    294 				if filepath.Base(file.Name) == "__init__.py" {
    295 					// for existing __init__.py files, we should trim last "/" for twice.
    296 					// eg. a/b/c/__init__.py ---> a/b
    297 					parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name))
    298 				} else {
    299 					parentPath = pathBeforeLastSlash(file.Name)
    300 				}
    301 				populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs)
    302 			}
    303 		}
    304 		for _, pkg := range newPyPkgs {
    305 			var emptyBuf []byte
    306 			fh := &zip.FileHeader{
    307 				Name:               filepath.Join(pkg, "__init__.py"),
    308 				Method:             zip.Store,
    309 				UncompressedSize64: uint64(len(emptyBuf)),
    310 			}
    311 			fh.SetMode(0700)
    312 			fh.SetModTime(jar.DefaultTime)
    313 			fileSource := bufferEntry{fh, emptyBuf}
    314 			addMapping(filepath.Join(pkg, "__init__.py"), fileSource)
    315 		}
    316 	}
    317 	for _, namedReader := range readers {
    318 		_, skipStripThisZip := zipsToNotStrip[namedReader.path]
    319 		for _, file := range namedReader.reader.File {
    320 			if !skipStripThisZip && shouldStripFile(emulateJar, file.Name) {
    321 				continue
    322 			}
    323 
    324 			if stripDirEntries && file.FileInfo().IsDir() {
    325 				continue
    326 			}
    327 
    328 			// check for other files or directories destined for the same path
    329 			dest := file.Name
    330 
    331 			// make a new entry to add
    332 			source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file}
    333 
    334 			if existingSource := addMapping(dest, source); existingSource != nil {
    335 				// handle duplicates
    336 				if existingSource.IsDir() != source.IsDir() {
    337 					return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
    338 						dest, existingSource, source)
    339 				}
    340 				if ignoreDuplicates {
    341 					continue
    342 				}
    343 				if emulateJar &&
    344 					file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass {
    345 					// Skip manifest and module info files that are not from the first input file
    346 					continue
    347 				}
    348 				if !source.IsDir() {
    349 					if emulateJar {
    350 						if existingSource.CRC32() != source.CRC32() {
    351 							fmt.Fprintf(os.Stdout, "WARNING: Duplicate path %v found in %v and %v\n",
    352 								dest, existingSource, source)
    353 						}
    354 					} else {
    355 						return fmt.Errorf("Duplicate path %v found in %v and %v\n",
    356 							dest, existingSource, source)
    357 					}
    358 				}
    359 			}
    360 		}
    361 	}
    362 
    363 	if emulateJar {
    364 		jarSort(orderedMappings)
    365 	} else if sortEntries {
    366 		alphanumericSort(orderedMappings)
    367 	}
    368 
    369 	for _, entry := range orderedMappings {
    370 		if err := entry.source.WriteToZip(entry.dest, writer); err != nil {
    371 			return err
    372 		}
    373 	}
    374 
    375 	return nil
    376 }
    377 
    378 // Sets the given directory and all its ancestor directories as Python packages.
    379 func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) {
    380 	for pkgPath != "" {
    381 		if _, found := existingPyPkgSet[pkgPath]; !found {
    382 			existingPyPkgSet[pkgPath] = true
    383 			*newPyPkgs = append(*newPyPkgs, pkgPath)
    384 			// Gets its ancestor directory by trimming last slash.
    385 			pkgPath = pathBeforeLastSlash(pkgPath)
    386 		} else {
    387 			break
    388 		}
    389 	}
    390 }
    391 
    392 func pathBeforeLastSlash(path string) string {
    393 	ret := filepath.Dir(path)
    394 	// filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
    395 	if ret == "." || ret == "/" {
    396 		return ""
    397 	}
    398 	return ret
    399 }
    400 
    401 func shouldStripFile(emulateJar bool, name string) bool {
    402 	for _, dir := range stripDirs {
    403 		if strings.HasPrefix(name, dir+"/") {
    404 			if emulateJar {
    405 				if name != jar.MetaDir && name != jar.ManifestFile {
    406 					return true
    407 				}
    408 			} else {
    409 				return true
    410 			}
    411 		}
    412 	}
    413 	for _, pattern := range stripFiles {
    414 		if match, err := filepath.Match(pattern, filepath.Base(name)); err != nil {
    415 			panic(fmt.Errorf("%s: %s", err.Error(), pattern))
    416 		} else if match {
    417 			return true
    418 		}
    419 	}
    420 	return false
    421 }
    422 
    423 func jarSort(files []fileMapping) {
    424 	sort.SliceStable(files, func(i, j int) bool {
    425 		return jar.EntryNamesLess(files[i].dest, files[j].dest)
    426 	})
    427 }
    428 
    429 func alphanumericSort(files []fileMapping) {
    430 	sort.SliceStable(files, func(i, j int) bool {
    431 		return files[i].dest < files[j].dest
    432 	})
    433 }
    434