1 #!/usr/bin/env python 2 # 3 # Copyright (C) 2017 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 """Helps compare openjdk_java_files contents against upstream file contents. 18 19 Outputs a tab-separated table comparing each openjdk_java_files entry 20 against OpenJDK upstreams. This can help verify updates to later upstreams 21 or focus attention towards files that may have been missed in a previous 22 update (http://b/36461944) or are otherwise surprising (http://b/36429512). 23 24 - Identifies each file as identical to, different from or missing from 25 each upstream; diffs are not produced. 26 - Optionally, copies all openjdk_java_files from the default upstream 27 (eg. OpenJDK8u121-b13) to a new directory, for easy directory comparison 28 using e.g. kdiff3, which allows inspecting detailed diffs. 29 - The ANDROID_BUILD_TOP environment variable must be set to point to the 30 AOSP root directory (parent of libcore). 31 - Run with -h command line argument to get usage instructions. 32 33 To check out upstreams OpenJDK 7u40, 8u60 and 8u121-b13, run: 34 35 mkdir openjdk 36 cd openjdk 37 hg clone http://hg.openjdk.java.net/jdk7u/jdk7u40/ 7u40 38 (cd !$ ; sh get_source.sh) 39 hg clone http://hg.openjdk.java.net/jdk8u/jdk8u 8u121-b13 40 (cd !$ ; hg update -r jdk8u121-b13 && sh get_source.sh) 41 hg clone http://hg.openjdk.java.net/jdk8u/jdk8u60/ 8u60 42 (cd !$ ; sh get_source.sh) 43 44 The newly created openjdk directory is then a suitable argument for the 45 --upstream_root parameter. 46 """ 47 48 import argparse 49 import csv 50 import filecmp 51 import os 52 import re 53 import shutil 54 import sys 55 56 def rel_paths_from_makefile(build_top): 57 """Returns the list of relative paths to .java files parsed from openjdk_java_files.mk""" 58 list_file = os.path.join(build_top, "libcore", "openjdk_java_files.mk") 59 60 result = [] 61 with open(list_file, "r") as f: 62 for line in f: 63 match = re.match("\s+ojluni/src/main/java/(.+\.java)\s*\\\s*", line) 64 if match: 65 path = match.group(1) 66 # convert / to the appropriate separator (e.g. \ on Windows), just in case 67 path = os.path.normpath(path) 68 result.append(path) 69 return result 70 71 def ojluni_path(build_top, rel_path): 72 """The full path of the file at the given rel_path in ojluni""" 73 return os.path.join(build_top, "libcore", "ojluni", "src", "main", "java", rel_path) 74 75 def upstream_path(upstream_root, upstream, rel_path): 76 """The full path of the file at the given rel_path in the given upstream""" 77 source_dirs = [ 78 "jdk/src/share/classes", 79 "jdk/src/solaris/classes" 80 ] 81 for source_dir in source_dirs: 82 source_dir = os.path.normpath(source_dir) 83 result = os.path.join(upstream_root, upstream, source_dir, rel_path) 84 if os.path.exists(result): 85 return result 86 return None 87 88 89 # For files with N and M lines, respectively, this runs in time 90 # O(N+M) if the files are identical or O(N*M) if not. This could 91 # be improved to O(D*(N+M)) for files with at most D lines 92 # difference by only considering array elements within D cells 93 # from the diagonal. 94 def edit_distance_lines(file_a, file_b): 95 """ 96 Computes the line-based edit distance between two text files, i.e. 97 the smallest number of line deletions, additions or replacements 98 that would transform the content of one file into that of the other. 99 """ 100 if filecmp.cmp(file_a, file_b, shallow=False): 101 return 0 # files identical 102 with open(file_a) as f: 103 lines_a = f.readlines() 104 with open(file_b) as f: 105 lines_b = f.readlines() 106 prev_cost = range(0, len(lines_b) + 1) 107 for end_a in range(1, len(lines_a) + 1): 108 # For each valid index i, prev_cost[i] is the edit distance between 109 # lines_a[:end_a-1] and lines_b[:i]. 110 # We now calculate cur_cost[end_b] as the edit distance between 111 # line_a[:end_a] and lines_b[:end_b] 112 cur_cost = [end_a] 113 for end_b in range(1, len(lines_b) + 1): 114 c = min( 115 cur_cost[-1] + 1, # append line from b 116 prev_cost[end_b] + 1, # append line from a 117 # match or replace line 118 prev_cost[end_b - 1] + (0 if lines_a[end_a - 1] == lines_b[end_b - 1] else 1) 119 ) 120 cur_cost.append(c) 121 prev_cost = cur_cost 122 return prev_cost[-1] 123 124 def compare_to_upstreams_and_save(out_file, build_top, upstream_root, upstreams, rel_paths, best_only=False): 125 """ 126 Prints tab-separated values comparing ojluni files vs. each 127 upstream, for each of the rel_paths, suitable for human 128 analysis in a spreadsheet. 129 This includes whether the corresponding upstream file is 130 missing, identical, or by how many lines it differs, and 131 a guess as to the correct upstream based on minimal line 132 difference (ties broken in favor of upstreams that occur 133 earlier in the list). 134 """ 135 writer = csv.writer(out_file, delimiter='\t') 136 writer.writerow(["rel_path", "guessed_upstream"] + upstreams) 137 for rel_path in rel_paths: 138 ojluni_file = ojluni_path(build_top, rel_path) 139 upstream_comparisons = [] 140 best_distance = sys.maxint 141 guessed_upstream = "" 142 for upstream in upstreams: 143 upstream_file = upstream_path(upstream_root, upstream, rel_path) 144 if upstream_file is None: 145 upstream_comparison = "missing" 146 else: 147 edit_distance = edit_distance_lines(upstream_file, ojluni_file) 148 if edit_distance == 0: 149 upstream_comparison = "identical" 150 else: 151 upstream_comparison = "different (%d lines)" % (edit_distance) 152 if edit_distance < best_distance: 153 best_distance = edit_distance 154 guessed_upstream = upstream 155 upstream_comparisons.append(upstream_comparison) 156 writer.writerow([rel_path, guessed_upstream ] + upstream_comparisons) 157 158 def copy_files(rel_paths, upstream_root, upstream, output_dir): 159 """Copies files at the given rel_paths from upstream to output_dir""" 160 for rel_path in rel_paths: 161 upstream_file = upstream_path(upstream_root, upstream, rel_path) 162 if upstream_file is not None: 163 out_file = os.path.join(output_dir, rel_path) 164 out_dir = os.path.dirname(out_file) 165 if not os.path.exists(out_dir): 166 os.makedirs(out_dir) 167 shutil.copyfile(upstream_file, out_file) 168 169 def main(): 170 parser = argparse.ArgumentParser( 171 description="Check openjdk_java_files contents against upstream file contents.") 172 parser.add_argument("--upstream_root", 173 help="Path below where upstream sources are checked out. This should be a " 174 "directory with one child directory for each upstream (select the " 175 "upstreams to compare against via --upstreams).", 176 required=True,) 177 parser.add_argument("--upstreams", 178 default="8u121-b13,8u60,7u40", 179 help="Comma separated list of subdirectory names of --upstream_root that " 180 "each hold one upstream.") 181 parser.add_argument("--output_dir", 182 help="(optional) path where default upstream sources should be copied to; " 183 "this path must not yet exist and will be created. " 184 "The default upstream is the one that occurs first in --upstreams.") 185 parser.add_argument("--build_top", 186 default=os.environ.get('ANDROID_BUILD_TOP'), 187 help="Path where Android sources are checked out (defaults to $ANDROID_BUILD_TOP).") 188 args = parser.parse_args() 189 if args.output_dir is not None and os.path.exists(args.output_dir): 190 raise Exception("Output dir already exists: " + args.output_dir) 191 192 upstreams = [upstream.strip() for upstream in args.upstreams.split(',')] 193 default_upstream = upstreams[0] 194 for upstream in upstreams: 195 upstream_path = os.path.join(args.upstream_root, upstream) 196 if not os.path.exists(upstream_path): 197 raise Exception("Upstream not found: " + upstream_path) 198 199 rel_paths = rel_paths_from_makefile(args.build_top) 200 201 compare_to_upstreams_and_save( 202 sys.stdout, args.build_top, args.upstream_root, upstreams, rel_paths) 203 204 if args.output_dir is not None: 205 copy_files(rel_paths, args.upstream_root, default_upstream, args.output_dir) 206 207 if __name__ == '__main__': 208 main() 209