Home | History | Annotate | Download | only in upstream
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (C) 2017 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Helps compare openjdk_java_files contents against upstream file contents.
     18 
     19 Outputs a tab-separated table comparing each openjdk_java_files entry
     20 against OpenJDK upstreams. This can help verify updates to later upstreams
     21 or focus attention towards files that may have been missed in a previous
     22 update (http://b/36461944) or are otherwise surprising (http://b/36429512).
     23 
     24  - Identifies each file as identical to, different from or missing from
     25    each upstream; diffs are not produced.
     26  - Optionally, copies all openjdk_java_files from the default upstream
     27    (eg. OpenJDK8u121-b13) to a new directory, for easy directory comparison
     28    using e.g. kdiff3, which allows inspecting detailed diffs.
     29  - The ANDROID_BUILD_TOP environment variable must be set to point to the
     30    AOSP root directory (parent of libcore).
     31  - Run with -h command line argument to get usage instructions.
     32 
     33 To check out upstreams OpenJDK 7u40, 8u60 and 8u121-b13, run:
     34 
     35 mkdir openjdk
     36 cd openjdk
     37 hg clone http://hg.openjdk.java.net/jdk7u/jdk7u40/ 7u40
     38 (cd !$ ; sh get_source.sh)
     39 hg clone http://hg.openjdk.java.net/jdk8u/jdk8u 8u121-b13
     40 (cd !$ ; hg update -r jdk8u121-b13 && sh get_source.sh)
     41 hg clone http://hg.openjdk.java.net/jdk8u/jdk8u60/ 8u60
     42 (cd !$ ; sh get_source.sh)
     43 
     44 The newly created openjdk directory is then a suitable argument for the
     45 --upstream_root parameter.
     46 """
     47 
     48 import argparse
     49 import csv
     50 import filecmp
     51 import os
     52 import re
     53 import shutil
     54 import sys
     55 
     56 def rel_paths_from_makefile(build_top):
     57     """Returns the list of relative paths to .java files parsed from openjdk_java_files.mk"""
     58     list_file = os.path.join(build_top, "libcore", "openjdk_java_files.mk")
     59 
     60     result = []
     61     with open(list_file, "r") as f:
     62         for line in f:
     63             match = re.match("\s+ojluni/src/main/java/(.+\.java)\s*\\\s*", line)
     64             if match:
     65                 path = match.group(1)
     66                 # convert / to the appropriate separator (e.g. \ on Windows), just in case
     67                 path = os.path.normpath(path)
     68                 result.append(path)
     69     return result
     70 
     71 def ojluni_path(build_top, rel_path):
     72     """The full path of the file at the given rel_path in ojluni"""
     73     return os.path.join(build_top, "libcore", "ojluni", "src", "main", "java", rel_path)
     74 
     75 def upstream_path(upstream_root, upstream, rel_path):
     76     """The full path of the file at the given rel_path in the given upstream"""
     77     source_dirs = [
     78         "jdk/src/share/classes",
     79         "jdk/src/solaris/classes"
     80     ]
     81     for source_dir in source_dirs:
     82         source_dir = os.path.normpath(source_dir)
     83         result = os.path.join(upstream_root, upstream, source_dir, rel_path)
     84         if os.path.exists(result):
     85             return result
     86     return None
     87 
     88 
     89 # For files with N and M lines, respectively, this runs in time
     90 # O(N+M) if the files are identical or O(N*M) if not. This could
     91 # be improved to O(D*(N+M)) for files with at most D lines
     92 # difference by only considering array elements within D cells
     93 # from the diagonal.
     94 def edit_distance_lines(file_a, file_b):
     95     """
     96     Computes the line-based edit distance between two text files, i.e.
     97     the smallest number of line deletions, additions or replacements
     98     that would transform the content of one file into that of the other.
     99     """
    100     if filecmp.cmp(file_a, file_b, shallow=False):
    101         return 0 # files identical
    102     with open(file_a) as f:
    103         lines_a = f.readlines()
    104     with open(file_b) as f:
    105         lines_b = f.readlines()
    106     prev_cost = range(0, len(lines_b) + 1)
    107     for end_a in range(1, len(lines_a) + 1):
    108         # For each valid index i, prev_cost[i] is the edit distance between
    109         # lines_a[:end_a-1] and lines_b[:i].
    110         # We now calculate cur_cost[end_b] as the edit distance between
    111         # line_a[:end_a] and lines_b[:end_b]
    112         cur_cost = [end_a]
    113         for end_b in range(1, len(lines_b) + 1):
    114             c = min(
    115                 cur_cost[-1] + 1, # append line from b
    116                 prev_cost[end_b] + 1, # append line from a
    117                 # match or replace line
    118                 prev_cost[end_b - 1] + (0 if lines_a[end_a - 1] == lines_b[end_b - 1] else 1)
    119                 )
    120             cur_cost.append(c)
    121         prev_cost = cur_cost
    122     return prev_cost[-1]
    123 
    124 def compare_to_upstreams_and_save(out_file, build_top, upstream_root, upstreams, rel_paths, best_only=False):
    125     """
    126     Prints tab-separated values comparing ojluni files vs. each
    127     upstream, for each of the rel_paths, suitable for human
    128     analysis in a spreadsheet.
    129     This includes whether the corresponding upstream file is
    130     missing, identical, or by how many lines it differs, and
    131     a guess as to the correct upstream based on minimal line
    132     difference (ties broken in favor of upstreams that occur
    133     earlier in the list).
    134     """
    135     writer = csv.writer(out_file, delimiter='\t')
    136     writer.writerow(["rel_path", "guessed_upstream"] + upstreams)
    137     for rel_path in rel_paths:
    138         ojluni_file = ojluni_path(build_top, rel_path)
    139         upstream_comparisons = []
    140         best_distance = sys.maxint
    141         guessed_upstream = ""
    142         for upstream in upstreams:
    143             upstream_file = upstream_path(upstream_root, upstream, rel_path)
    144             if upstream_file is None:
    145                 upstream_comparison = "missing"
    146             else:
    147                 edit_distance = edit_distance_lines(upstream_file, ojluni_file)
    148                 if edit_distance == 0:
    149                     upstream_comparison = "identical"
    150                 else:
    151                     upstream_comparison = "different (%d lines)" % (edit_distance)
    152                 if edit_distance < best_distance:
    153                     best_distance = edit_distance
    154                     guessed_upstream = upstream
    155             upstream_comparisons.append(upstream_comparison)
    156         writer.writerow([rel_path, guessed_upstream ] + upstream_comparisons)
    157 
    158 def copy_files(rel_paths, upstream_root, upstream, output_dir):
    159     """Copies files at the given rel_paths from upstream to output_dir"""
    160     for rel_path in rel_paths:
    161         upstream_file = upstream_path(upstream_root, upstream, rel_path)
    162         if upstream_file is not None:
    163             out_file = os.path.join(output_dir, rel_path)
    164             out_dir = os.path.dirname(out_file)
    165             if not os.path.exists(out_dir):
    166                 os.makedirs(out_dir)
    167             shutil.copyfile(upstream_file, out_file)
    168 
    169 def main():
    170     parser = argparse.ArgumentParser(
    171     description="Check openjdk_java_files contents against upstream file contents.")
    172     parser.add_argument("--upstream_root",
    173         help="Path below where upstream sources are checked out. This should be a "
    174             "directory with one child directory for each upstream (select the "
    175             "upstreams to compare against via --upstreams).",
    176         required=True,)
    177     parser.add_argument("--upstreams", 
    178         default="8u121-b13,8u60,7u40",
    179         help="Comma separated list of subdirectory names of --upstream_root that "
    180             "each hold one upstream.")
    181     parser.add_argument("--output_dir",
    182         help="(optional) path where default upstream sources should be copied to; "
    183             "this path must not yet exist and will be created. "
    184             "The default upstream is the one that occurs first in --upstreams.")
    185     parser.add_argument("--build_top",
    186         default=os.environ.get('ANDROID_BUILD_TOP'),
    187         help="Path where Android sources are checked out (defaults to $ANDROID_BUILD_TOP).")
    188     args = parser.parse_args()
    189     if args.output_dir is not None and os.path.exists(args.output_dir):
    190         raise Exception("Output dir already exists: " + args.output_dir)
    191 
    192     upstreams = [upstream.strip() for upstream in args.upstreams.split(',')]
    193     default_upstream = upstreams[0]
    194     for upstream in upstreams:
    195         upstream_path = os.path.join(args.upstream_root, upstream)
    196         if not os.path.exists(upstream_path):
    197             raise Exception("Upstream not found: " + upstream_path)
    198 
    199     rel_paths = rel_paths_from_makefile(args.build_top)
    200 
    201     compare_to_upstreams_and_save(
    202         sys.stdout, args.build_top, args.upstream_root, upstreams, rel_paths)
    203 
    204     if args.output_dir is not None:
    205         copy_files(rel_paths, args.upstream_root, default_upstream, args.output_dir)
    206 
    207 if __name__ == '__main__':
    208     main()
    209