Home | History | Annotate | Download | only in mac
      1 #!/bin/bash -p
      2 
      3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
      4 # Use of this source code is governed by a BSD-style license that can be
      5 # found in the LICENSE file.
      6 
      7 # usage: dirdiffer.sh old_dir new_dir patch_dir
      8 #
      9 # dirdiffer creates a patch directory patch_dir that represents the difference
     10 # between old_dir and new_dir. patch_dir can be used with dirpatcher to
     11 # recreate new_dir given old_dir.
     12 #
     13 # dirdiffer operates recursively, properly handling ordinary files, symbolic
     14 # links, and directories, as they are found in new_dir. Symbolic links and
     15 # directories are always replicated as-is in patch_dir. Ordinary files will
     16 # be represented at the appropriate location in patch_dir by one of the
     17 # following:
     18 #
     19 #  - a binary diff prepared by goobsdiff that can transform the file at the
     20 #    same position in old_dir to the version in new_dir, but only when such a
     21 #    file already exists in old_dir and is an ordinary file. These files are
     22 #    given a "$gbs" suffix.
     23 #  - a bzip2-compressed copy of the new file from new_dir; in patch_dir, the
     24 #    new file will have a "$bz2" suffix.
     25 #  - a gzip-compressed copy of the new file from new_dir; in patch_dir, the
     26 #    new file will have a "$gz" suffix.
     27 #  - an xz/lzma2-compressed copy of the new file from new_dir; in patch_dir,
     28 #    the new file will have an "$xz" suffix.
     29 #  - an uncompressed copy of the new file from new_dir; in patch_dir, the
     30 #    new file will have a "$raw" suffix.
     31 #
     32 # The unconventional suffixes are used because they aren't likely to occur in
     33 # filenames.
     34 #
     35 # Of these options, the smallest possible representation is chosen. Note that
     36 # goobsdiff itself will also compress various sections of a binary diff with
     37 # bzip2, gzip, or xz/lzma2, or leave them uncompressed, according to which is
     38 # smallest. The approach of choosing the smallest possible representation is
     39 # time-consuming but given the choices of compressors results in an overall
     40 # size reduction of about 3%-5% relative to using bzip2 as the only
     41 # compressor; bzip2 is generally more effective for these data sets than gzip,
     42 # and xz/lzma2 more effective than bzip2.
     43 #
     44 # For large input files, goobsdiff is also very time-consuming and
     45 # memory-intensive. The overall "wall clock time" spent preparing a patch_dir
     46 # representing the differences between Google Chrome's 6.0.422.0 and 6.0.427.0
     47 # versioned directories from successive weekly dev channel releases on a
     48 # 2.53GHz dual-core 4GB MacBook Pro is 3 minutes. Reconstructing new_dir with
     49 # dirpatcher is much quicker; in the above configuration, only 10 seconds are
     50 # needed for reconstruction.
     51 #
     52 # After creating a full patch_dir structure, but before returning, dirpatcher
     53 # is invoked to attempt to recreate new_dir in a temporary location given
     54 # old_dir and patch_dir. The recreated new_dir is then compared against the
     55 # original new_dir as a verification step. Should verification fail, dirdiffer
     56 # exits with a nonzero status, and patch_dir should not be used.
     57 #
     58 # Environment variables:
     59 # DIRDIFFER_EXCLUDE
     60 #   When an entry in new_dir matches this regular expression, it will not be
     61 #   included in patch_dir. All prospective paths in new_dir will be matched
     62 #   against this regular expression, including directories. If a directory
     63 #   matches this pattern, dirdiffer will also ignore the directory's contents.
     64 # DIRDIFFER_NO_DIFF
     65 #   When an entry in new_dir matches this regular expression, it will not be
     66 #   represented in patch_dir by a $gbs file prepared by goobsdiff. It will only
     67 #   appear as a $bz2, $gz, or $raw file. Only files in new_dir, not
     68 #   directories,  will be matched against this regular expression.
     69 #
     70 # Exit codes:
     71 #  0  OK
     72 #  1  Unknown failure
     73 #  2  Incorrect number of parameters
     74 #  3  Input directories do not exist or are not directories
     75 #  4  Output directory already exists
     76 #  5  Parent of output directory does not exist or is not a directory
     77 #  6  An input or output directories contains another
     78 #  7  Could not create output directory
     79 #  8  File already exists in output directory
     80 #  9  Found an irregular file (non-directory, file, or symbolic link) in input
     81 # 10  Could not create symbolic link
     82 # 11  File copy failed
     83 # 12  bzip2 compression failed
     84 # 13  gzip compression failed
     85 # 14  xz/lzma2 compression failed
     86 # 15  Patch creation failed
     87 # 16  Verification failed
     88 # 17  Could not set mode (permissions)
     89 # 18  Could not set modification time
     90 # 19  Invalid regular expression (irregular expression?)
     91 
     92 set -eu
     93 
     94 # Environment sanitization. Set a known-safe PATH. Clear environment variables
     95 # that might impact the interpreter's operation. The |bash -p| invocation
     96 # on the #! line takes the bite out of BASH_ENV, ENV, and SHELLOPTS (among
     97 # other features), but clearing them here ensures that they won't impact any
     98 # shell scripts used as utility programs. SHELLOPTS is read-only and can't be
     99 # unset, only unexported.
    100 export PATH="/usr/bin:/bin:/usr/sbin:/sbin"
    101 unset BASH_ENV CDPATH ENV GLOBIGNORE IFS POSIXLY_CORRECT
    102 export -n SHELLOPTS
    103 
    104 shopt -s dotglob nullglob
    105 
    106 # find_tool looks for an executable file named |tool_name|:
    107 #  - in the same directory as this script,
    108 #  - if this script is located in a Chromium source tree, at the expected
    109 #    Release output location in the Mac out directory,
    110 #  - as above, but in the Debug output location
    111 # If found in any of the above locations, the script's path is output.
    112 # Otherwise, this function outputs |tool_name| as a fallback, allowing it to
    113 # be found (or not) by an ordinary ${PATH} search.
    114 find_tool() {
    115   local tool_name="${1}"
    116 
    117   local script_dir
    118   script_dir="$(dirname "${0}")"
    119 
    120   local tool="${script_dir}/${tool_name}"
    121   if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then
    122     echo "${tool}"
    123     return
    124   fi
    125 
    126   local script_dir_phys
    127   script_dir_phys="$(cd "${script_dir}" && pwd -P)"
    128   if [[ "${script_dir_phys}" =~ ^(.*)/src/chrome/installer/mac$ ]]; then
    129     tool="${BASH_REMATCH[1]}/src/out/Release/${tool_name}"
    130     if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then
    131       echo "${tool}"
    132       return
    133     fi
    134 
    135     tool="${BASH_REMATCH[1]}/src/out/Debug/${tool_name}"
    136     if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then
    137       echo "${tool}"
    138       return
    139     fi
    140   fi
    141 
    142   echo "${tool_name}"
    143 }
    144 
    145 ME="$(basename "${0}")"
    146 readonly ME
    147 DIRPATCHER="$(dirname "${0}")/dirpatcher.sh"
    148 readonly DIRPATCHER
    149 GOOBSDIFF="$(find_tool goobsdiff)"
    150 readonly GOOBSDIFF
    151 readonly BZIP2="bzip2"
    152 readonly GZIP="gzip"
    153 XZ="$(find_tool xz)"
    154 readonly XZ
    155 readonly GBS_SUFFIX='$gbs'
    156 readonly BZ2_SUFFIX='$bz2'
    157 readonly GZ_SUFFIX='$gz'
    158 readonly XZ_SUFFIX='$xz'
    159 readonly PLAIN_SUFFIX='$raw'
    160 
    161 # Workaround for http://code.google.com/p/chromium/issues/detail?id=83180#c3
    162 # In bash 4.0, "declare VAR" no longer initializes VAR if not already set.
    163 : ${DIRDIFFER_EXCLUDE:=}
    164 : ${DIRDIFFER_NO_DIFF:=}
    165 
    166 err() {
    167   local error="${1}"
    168 
    169   echo "${ME}: ${error}" >& 2
    170 }
    171 
    172 declare -a g_cleanup g_verify_exclude
    173 cleanup() {
    174   local status=${?}
    175 
    176   trap - EXIT
    177   trap '' HUP INT QUIT TERM
    178 
    179   if [[ ${status} -ge 128 ]]; then
    180     err "Caught signal $((${status} - 128))"
    181   fi
    182 
    183   if [[ "${#g_cleanup[@]}" -gt 0 ]]; then
    184     rm -rf "${g_cleanup[@]}"
    185   fi
    186 
    187   exit ${status}
    188 }
    189 
    190 copy_mode_and_time() {
    191   local new_file="${1}"
    192   local patch_file="${2}"
    193 
    194   local mode
    195   mode="$(stat "-f%OMp%OLp" "${new_file}")"
    196   if ! chmod -h "${mode}" "${patch_file}"; then
    197     exit 17
    198   fi
    199 
    200   if ! [[ -L "${patch_file}" ]]; then
    201     # Symbolic link modification times can't be copied because there's no
    202     # shell tool that provides direct access to lutimes. Instead, the symbolic
    203     # link was created with rsync, which already copied the timestamp with
    204     # lutimes.
    205     if ! touch -r "${new_file}" "${patch_file}"; then
    206       exit 18
    207     fi
    208   fi
    209 }
    210 
    211 file_size() {
    212   local file="${1}"
    213 
    214   stat -f %z "${file}"
    215 }
    216 
    217 make_patch_file() {
    218   local old_file="${1}"
    219   local new_file="${2}"
    220   local patch_file="${3}"
    221 
    222   local uncompressed_file="${patch_file}${PLAIN_SUFFIX}"
    223   if ! cp "${new_file}" "${uncompressed_file}"; then
    224     exit 11
    225   fi
    226   local uncompressed_size
    227   uncompressed_size="$(file_size "${new_file}")"
    228 
    229   local keep_file="${uncompressed_file}"
    230   local keep_size="${uncompressed_size}"
    231 
    232   local bz2_file="${patch_file}${BZ2_SUFFIX}"
    233   if [[ -e "${bz2_file}" ]]; then
    234     err "${bz2_file} already exists"
    235     exit 8
    236   fi
    237   if ! "${BZIP2}" -9c < "${new_file}" > "${bz2_file}"; then
    238     err "couldn't compress ${new_file} to ${bz2_file} with ${BZIP2}"
    239     exit 12
    240   fi
    241   local bz2_size
    242   bz2_size="$(file_size "${bz2_file}")"
    243 
    244   if [[ "${bz2_size}" -ge "${keep_size}" ]]; then
    245     rm -f "${bz2_file}"
    246   else
    247     rm -f "${keep_file}"
    248     keep_file="${bz2_file}"
    249     keep_size="${bz2_size}"
    250   fi
    251 
    252   local gz_file="${patch_file}${GZ_SUFFIX}"
    253   if [[ -e "${gz_file}" ]]; then
    254     err "${gz_file} already exists"
    255     exit 8
    256   fi
    257   if ! "${GZIP}" -9cn < "${new_file}" > "${gz_file}"; then
    258     err "couldn't compress ${new_file} to ${gz_file} with ${GZIP}"
    259     exit 13
    260   fi
    261   local gz_size
    262   gz_size="$(file_size "${gz_file}")"
    263 
    264   if [[ "${gz_size}" -ge "${keep_size}" ]]; then
    265     rm -f "${gz_file}"
    266   else
    267     rm -f "${keep_file}"
    268     keep_file="${gz_file}"
    269     keep_size="${gz_size}"
    270   fi
    271 
    272   local xz_flags=("-c")
    273 
    274   # If the file looks like a Mach-O file, including a universal/fat file, add
    275   # the x86 BCJ filter, which results in slightly better compression of x86
    276   # and x86_64 executables. Mach-O files might contain other architectures,
    277   # but they aren't currently expected in Chrome.
    278   local file_output
    279   file_output="$(file "${new_file}" 2> /dev/null || true)"
    280   if [[ "${file_output}" =~ Mach-O ]]; then
    281     xz_flags+=("--x86")
    282   fi
    283 
    284   # Use an lzma2 encoder. This is equivalent to xz -9 -e, but allows filters
    285   # to precede the compressor.
    286   xz_flags+=("--lzma2=preset=9e")
    287 
    288   local xz_file="${patch_file}${XZ_SUFFIX}"
    289   if [[ -e "${xz_file}" ]]; then
    290     err "${xz_file} already exists"
    291     exit 8
    292   fi
    293   if ! "${XZ}" "${xz_flags[@]}" < "${new_file}" > "${xz_file}"; then
    294     err "couldn't compress ${new_file} to ${xz_file} with ${XZ}"
    295     exit 14
    296   fi
    297   local xz_size
    298   xz_size="$(file_size "${xz_file}")"
    299 
    300   if [[ "${xz_size}" -ge "${keep_size}" ]]; then
    301     rm -f "${xz_file}"
    302   else
    303     rm -f "${keep_file}"
    304     keep_file="${xz_file}"
    305     keep_size="${xz_size}"
    306   fi
    307 
    308   if [[ -f "${old_file}" ]] && ! [[ -L "${old_file}" ]] &&
    309      ! [[ "${new_file}" =~ ${DIRDIFFER_NO_DIFF} ]]; then
    310     local gbs_file="${patch_file}${GBS_SUFFIX}"
    311     if [[ -e "${gbs_file}" ]]; then
    312       err "${gbs_file} already exists"
    313       exit 8
    314     fi
    315     if ! "${GOOBSDIFF}" "${old_file}" "${new_file}" "${gbs_file}"; then
    316       err "couldn't create ${gbs_file} by comparing ${old_file} to ${new_file}"
    317       exit 15
    318     fi
    319     local gbs_size
    320     gbs_size="$(file_size "${gbs_file}")"
    321 
    322     if [[ "${gbs_size}" -ge "${keep_size}" ]]; then
    323       rm -f "${gbs_file}"
    324     else
    325       rm -f "${keep_file}"
    326       keep_file="${gbs_file}"
    327       keep_size="${gbs_size}"
    328     fi
    329   fi
    330 
    331   copy_mode_and_time "${new_file}" "${keep_file}"
    332 }
    333 
    334 make_patch_symlink() {
    335   local new_file="${1}"
    336   local patch_file="${2}"
    337 
    338   # local target
    339   # target="$(readlink "${new_file}")"
    340   # ln -s "${target}" "${patch_file}"
    341 
    342   # Use rsync instead of the above, as it's the only way to preserve the
    343   # timestamp of a symbolic link using shell tools.
    344   if ! rsync -lt "${new_file}" "${patch_file}"; then
    345     exit 10
    346   fi
    347 
    348   copy_mode_and_time "${new_file}" "${patch_file}"
    349 }
    350 
    351 make_patch_dir() {
    352   local old_dir="${1}"
    353   local new_dir="${2}"
    354   local patch_dir="${3}"
    355 
    356   if ! mkdir "${patch_dir}"; then
    357     exit 7
    358   fi
    359 
    360   local new_file
    361   for new_file in "${new_dir}/"*; do
    362     local file="${new_file:${#new_dir} + 1}"
    363     local old_file="${old_dir}/${file}"
    364     local patch_file="${patch_dir}/${file}"
    365 
    366     if [[ "${new_file}" =~ ${DIRDIFFER_EXCLUDE} ]]; then
    367       g_verify_exclude+=("${new_file}")
    368       continue
    369     fi
    370 
    371     if [[ -e "${patch_file}" ]]; then
    372       err "${patch_file} already exists"
    373       exit 8
    374     fi
    375 
    376     if [[ -L "${new_file}" ]]; then
    377       make_patch_symlink "${new_file}" "${patch_file}"
    378     elif [[ -d "${new_file}" ]]; then
    379       make_patch_dir "${old_file}" "${new_file}" "${patch_file}"
    380     elif [[ ! -f "${new_file}" ]]; then
    381       err "can't handle irregular file ${new_file}"
    382       exit 9
    383     else
    384       make_patch_file "${old_file}" "${new_file}" "${patch_file}"
    385     fi
    386   done
    387 
    388   copy_mode_and_time "${new_dir}" "${patch_dir}"
    389 }
    390 
    391 verify_patch_dir() {
    392   local old_dir="${1}"
    393   local new_dir="${2}"
    394   local patch_dir="${3}"
    395 
    396   local verify_temp_dir verify_dir
    397   verify_temp_dir="$(mktemp -d -t "${ME}")"
    398   g_cleanup+=("${verify_temp_dir}")
    399   verify_dir="${verify_temp_dir}/patched"
    400 
    401   if ! "${DIRPATCHER}" "${old_dir}" "${patch_dir}" "${verify_dir}"; then
    402     err "patch application for verification failed"
    403     exit 16
    404   fi
    405 
    406   # rsync will print a line for any file, directory, or symbolic link that
    407   # differs or exists only in one directory. As used here, it correctly
    408   # considers link targets, file contents, permissions, and timestamps.
    409   local rsync_command=(rsync -clprt --delete --out-format=%n \
    410                        "${new_dir}/" "${verify_dir}")
    411   if [[ ${#g_verify_exclude[@]} -gt 0 ]]; then
    412     local exclude
    413     for exclude in "${g_verify_exclude[@]}"; do
    414       # ${g_verify_exclude[@]} contains paths in ${new_dir}. Strip off
    415       # ${new_dir} from the beginning of each, but leave a leading "/" so that
    416       # rsync treats them as being at the root of the "transfer."
    417       rsync_command+=("--exclude" "${exclude:${#new_dir}}")
    418     done
    419   fi
    420 
    421   local rsync_output
    422   if ! rsync_output="$("${rsync_command[@]}")"; then
    423     err "rsync for verification failed"
    424     exit 16
    425   fi
    426 
    427   rm -rf "${verify_temp_dir}"
    428   unset g_cleanup[${#g_cleanup[@]}]
    429 
    430   if [[ -n "${rsync_output}" ]]; then
    431     err "verification failed"
    432     exit 16
    433   fi
    434 }
    435 
    436 # shell_safe_path ensures that |path| is safe to pass to tools as a
    437 # command-line argument. If the first character in |path| is "-", "./" is
    438 # prepended to it. The possibly-modified |path| is output.
    439 shell_safe_path() {
    440   local path="${1}"
    441   if [[ "${path:0:1}" = "-" ]]; then
    442     echo "./${path}"
    443   else
    444     echo "${path}"
    445   fi
    446 }
    447 
    448 dirs_contained() {
    449   local dir1="${1}/"
    450   local dir2="${2}/"
    451 
    452   if [[ "${dir1:0:${#dir2}}" = "${dir2}" ]] ||
    453      [[ "${dir2:0:${#dir1}}" = "${dir1}" ]]; then
    454     return 0
    455   fi
    456 
    457   return 1
    458 }
    459 
    460 usage() {
    461   echo "usage: ${ME} old_dir new_dir patch_dir" >& 2
    462 }
    463 
    464 main() {
    465   local old_dir new_dir patch_dir
    466   old_dir="$(shell_safe_path "${1}")"
    467   new_dir="$(shell_safe_path "${2}")"
    468   patch_dir="$(shell_safe_path "${3}")"
    469 
    470   trap cleanup EXIT HUP INT QUIT TERM
    471 
    472   if ! [[ -d "${old_dir}" ]] || ! [[ -d "${new_dir}" ]]; then
    473     err "old_dir and new_dir must exist and be directories"
    474     usage
    475     exit 3
    476   fi
    477 
    478   if [[ -e "${patch_dir}" ]]; then
    479     err "patch_dir must not exist"
    480     usage
    481     exit 4
    482   fi
    483 
    484   local patch_dir_parent
    485   patch_dir_parent="$(dirname "${patch_dir}")"
    486   if ! [[ -d "${patch_dir_parent}" ]]; then
    487     err "patch_dir parent directory must exist and be a directory"
    488     usage
    489     exit 5
    490   fi
    491 
    492   # The weird conditional structure is because the status of the RE comparison
    493   # needs to be available in ${?} without conflating it with other conditions
    494   # or negating it. Only a status of 2 from the =~ operator indicates an
    495   # invalid regular expression.
    496 
    497   if [[ -n "${DIRDIFFER_EXCLUDE}" ]]; then
    498     if [[ "" =~ ${DIRDIFFER_EXCLUDE} ]]; then
    499       true
    500     elif [[ ${?} -eq 2 ]]; then
    501       err "DIRDIFFER_EXCLUDE contains an invalid regular expression"
    502       exit 19
    503     fi
    504   fi
    505 
    506   if [[ -n "${DIRDIFFER_NO_DIFF}" ]]; then
    507     if [[ "" =~ ${DIRDIFFER_NO_DIFF} ]]; then
    508       true
    509     elif [[ ${?} -eq 2 ]]; then
    510       err "DIRDIFFER_NO_DIFF contains an invalid regular expression"
    511       exit 19
    512     fi
    513   fi
    514 
    515   local old_dir_phys new_dir_phys patch_dir_parent_phys patch_dir_phys
    516   old_dir_phys="$(cd "${old_dir}" && pwd -P)"
    517   new_dir_phys="$(cd "${new_dir}" && pwd -P)"
    518   patch_dir_parent_phys="$(cd "${patch_dir_parent}" && pwd -P)"
    519   patch_dir_phys="${patch_dir_parent_phys}/$(basename "${patch_dir}")"
    520 
    521   if dirs_contained "${old_dir_phys}" "${new_dir_phys}" ||
    522      dirs_contained "${old_dir_phys}" "${patch_dir_phys}" ||
    523      dirs_contained "${new_dir_phys}" "${patch_dir_phys}"; then
    524     err "directories must not contain one another"
    525     usage
    526     exit 6
    527   fi
    528 
    529   g_cleanup[${#g_cleanup[@]}]="${patch_dir}"
    530 
    531   make_patch_dir "${old_dir}" "${new_dir}" "${patch_dir}"
    532 
    533   verify_patch_dir "${old_dir}" "${new_dir}" "${patch_dir}"
    534 
    535   unset g_cleanup[${#g_cleanup[@]}]
    536   trap - EXIT
    537 }
    538 
    539 if [[ ${#} -ne 3 ]]; then
    540   usage
    541   exit 2
    542 fi
    543 
    544 main "${@}"
    545 exit ${?}
    546