Home | History | Annotate | Download | only in split_link
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Takes the same arguments as Windows link.exe, and a definition of libraries
      6 to split into subcomponents. Does multiple passes of link.exe invocation to
      7 determine exports between parts and generates .def and import libraries to
      8 cause symbols to be available to other parts."""
      9 
     10 import _winreg
     11 import ctypes
     12 import os
     13 import re
     14 import shutil
     15 import subprocess
     16 import sys
     17 import tempfile
     18 
     19 
     20 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
     21 
     22 
     23 # This can be set to ignore data exports. The resulting DLLs will probably not
     24 # run, but at least they can be generated. The log of data exports will still
     25 # be output.
     26 IGNORE_DATA = 0
     27 
     28 
     29 def Log(message):
     30   print 'split_link:', message
     31 
     32 
     33 def GetFlagsAndInputs(argv):
     34   """Parses the command line intended for link.exe and return the flags and
     35   input files."""
     36   rsp_expanded = []
     37   for arg in argv:
     38     if arg[0] == '@':
     39       with open(arg[1:]) as rsp:
     40         rsp_expanded.extend(rsp.read().splitlines())
     41     else:
     42       rsp_expanded.append(arg)
     43 
     44   # Use CommandLineToArgvW so we match link.exe parsing.
     45   try:
     46     size = ctypes.c_int()
     47     ptr = ctypes.windll.shell32.CommandLineToArgvW(
     48         ctypes.create_unicode_buffer(' '.join(rsp_expanded)),
     49         ctypes.byref(size))
     50     ref = ctypes.c_wchar_p * size.value
     51     raw = ref.from_address(ptr)
     52     args = [arg for arg in raw]
     53   finally:
     54     ctypes.windll.kernel32.LocalFree(ptr)
     55 
     56   inputs = []
     57   flags = []
     58   intermediate_manifest = ''
     59   for arg in args:
     60     lower_arg = arg.lower()
     61     # We'll be replacing these ourselves.
     62     if lower_arg.startswith('/out:'):
     63       continue
     64     if lower_arg.startswith('/manifestfile:'):
     65       intermediate_manifest = arg[arg.index(':')+1:]
     66       continue
     67     if lower_arg.startswith('/pdb:'):
     68       continue
     69     if (not lower_arg.startswith('/') and
     70         lower_arg.endswith(('.obj', '.lib', '.res'))):
     71       inputs.append(arg)
     72     else:
     73       flags.append(arg)
     74 
     75   return flags, inputs, intermediate_manifest
     76 
     77 
     78 def GetRegistryValue(subkey):
     79   try:
     80     val = _winreg.QueryValue(_winreg.HKEY_CURRENT_USER,
     81                              'Software\\Chromium\\' + subkey)
     82     if os.path.exists(val):
     83       return val
     84   except WindowsError:
     85     pass
     86 
     87   raise SystemExit("Couldn't read from registry")
     88 
     89 
     90 def GetOriginalLinkerPath():
     91   return GetRegistryValue('split_link_installed')
     92 
     93 
     94 def GetMtPath():
     95   return GetRegistryValue('split_link_mt_path')
     96 
     97 
     98 def PartFor(input_file, description_parts, description_all):
     99   """Determines which part a given link input should be put into (or all)."""
    100   # Check if it should go in all parts.
    101   input_file = input_file.lower()
    102   if any(re.search(spec, input_file) for spec in description_all):
    103     return -1
    104   # Or pick which particular one it belongs in.
    105   for i, spec_list in enumerate(description_parts):
    106     if any(re.search(spec, input_file) for spec in spec_list):
    107       return i
    108   raise ValueError("couldn't find location for %s" % input_file)
    109 
    110 
    111 def ParseOutExternals(output):
    112   """Given the stdout of link.exe, parses the error messages to retrieve all
    113   symbols that are unresolved."""
    114   result = set()
    115   # Styles of messages for unresolved externals, and a boolean to indicate
    116   # whether the error message emits the symbols with or without a leading
    117   # underscore.
    118   unresolved_regexes = [
    119     (re.compile(r' : error LNK2019: unresolved external symbol ".*" \((.*)\)'
    120                 r' referenced in function'),
    121      False),
    122     (re.compile(r' : error LNK2001: unresolved external symbol ".*" \((.*)\)$'),
    123      False),
    124     (re.compile(r' : error LNK2019: unresolved external symbol (.*)'
    125                 r' referenced in function '),
    126      True),
    127     (re.compile(r' : error LNK2001: unresolved external symbol (.*)$'),
    128      True),
    129   ]
    130   for line in output.splitlines():
    131     line = line.strip()
    132     for regex, strip_leading_underscore in unresolved_regexes:
    133       mo = regex.search(line)
    134       if mo:
    135         if strip_leading_underscore:
    136           result.add(mo.group(1)[1:])
    137         else:
    138           result.add(mo.group(1))
    139         break
    140 
    141   mo = re.search(r'fatal error LNK1120: (\d+) unresolved externals', output)
    142   # Make sure we have the same number that the linker thinks we have.
    143   if mo is None and result:
    144     raise SystemExit(output)
    145   if len(result) != int(mo.group(1)):
    146     print output
    147     print 'Expecting %d, got %d' % (int(mo.group(1)), len(result))
    148   assert len(result) == int(mo.group(1))
    149   return sorted(result)
    150 
    151 
    152 def AsCommandLineArgs(items):
    153   """Intended for output to a response file. Quotes all arguments."""
    154   return '\n'.join('"' + x + '"' for x in items)
    155 
    156 
    157 def OutputNameForIndex(index):
    158   """Gets the final output DLL name, given a zero-based index."""
    159   if index == 0:
    160     return "chrome.dll"
    161   else:
    162     return 'chrome%d.dll' % index
    163 
    164 
    165 def ManifestNameForIndex(index):
    166   return OutputNameForIndex(index) + '.intermediate.manifest'
    167 
    168 
    169 def PdbNameForIndex(index):
    170   return OutputNameForIndex(index) + '.pdb'
    171 
    172 
    173 def RunLinker(flags, index, inputs, phase, intermediate_manifest):
    174   """Invokes the linker and returns the stdout, returncode and target name."""
    175   rspfile = 'part%d_%s.rsp' % (index, phase)
    176   with open(rspfile, 'w') as f:
    177     print >> f, AsCommandLineArgs(inputs)
    178     print >> f, AsCommandLineArgs(flags)
    179     output_name = OutputNameForIndex(index)
    180     manifest_name = ManifestNameForIndex(index)
    181     print >> f, '/ENTRY:ChromeEmptyEntry@12'
    182     print >> f, '/OUT:' + output_name
    183     print >> f, '/MANIFESTFILE:' + manifest_name
    184     print >> f, '/PDB:' + PdbNameForIndex(index)
    185   # Log('[[[\n' + open(rspfile).read() + '\n]]]')
    186   link_exe = GetOriginalLinkerPath()
    187   popen = subprocess.Popen([link_exe, '@' + rspfile], stdout=subprocess.PIPE)
    188   stdout, _ = popen.communicate()
    189   if index == 0 and popen.returncode == 0 and intermediate_manifest:
    190     # Hack for ninja build. After the linker runs, it does some manifest
    191     # things and expects there to be a file in this location. We just put it
    192     # there so it's happy. This is a no-op.
    193     if os.path.isdir(os.path.dirname(intermediate_manifest)):
    194       shutil.copyfile(manifest_name, intermediate_manifest)
    195   return stdout, popen.returncode, output_name
    196 
    197 
    198 def GetLibObjList(lib):
    199   """Gets the list of object files contained in a .lib."""
    200   link_exe = GetOriginalLinkerPath()
    201   popen = subprocess.Popen(
    202       [link_exe, '/lib', '/nologo', '/list', lib], stdout=subprocess.PIPE)
    203   stdout, _ = popen.communicate()
    204   return stdout.splitlines()
    205 
    206 
    207 def ExtractObjFromLib(lib, obj):
    208   """Extracts a .obj file contained in a .lib file. Returns the absolute path
    209   a temp file."""
    210   link_exe = GetOriginalLinkerPath()
    211   temp = tempfile.NamedTemporaryFile(
    212       prefix='split_link_', suffix='.obj', delete=False)
    213   temp.close()
    214   subprocess.check_call([
    215     link_exe, '/lib', '/nologo', '/extract:' + obj, lib, '/out:' + temp.name])
    216   return temp.name
    217 
    218 
    219 def Unmangle(export):
    220   "Returns the human-presentable name of a mangled symbol."""
    221   # Use dbghelp.dll to demangle the name.
    222   # TODO(scottmg): Perhaps a simple cache? Seems pretty fast though.
    223   UnDecorateSymbolName = ctypes.windll.dbghelp.UnDecorateSymbolName
    224   buffer_size = 2048
    225   output_string = ctypes.create_string_buffer(buffer_size)
    226   if not UnDecorateSymbolName(
    227       export, ctypes.byref(output_string), buffer_size, 0):
    228     raise ctypes.WinError()
    229   return output_string.value
    230 
    231 
    232 def IsDataDefinition(export):
    233   """Determines if a given name is data rather than a function. Always returns
    234   False for C-style (as opposed to C++-style names)."""
    235   if export[0] != '?':
    236     return False
    237 
    238   # If it contains a '(' we assume it's a function.
    239   return '(' not in Unmangle(export)
    240 
    241 
    242 def GenerateDefFiles(unresolved_by_part):
    243   """Given a list of unresolved externals, generates a .def file that will
    244   cause all those symbols to be exported."""
    245   deffiles = []
    246   Log('generating .def files')
    247   for i, part in enumerate(unresolved_by_part):
    248     deffile = 'part%d.def' % i
    249     with open(deffile, 'w') as f:
    250       print >> f, 'LIBRARY %s' % OutputNameForIndex(i)
    251       print >> f, 'EXPORTS'
    252       for j, part in enumerate(unresolved_by_part):
    253         if i == j:
    254           continue
    255         is_data = \
    256             [' DATA' if IsDataDefinition(export) and not IGNORE_DATA else ''
    257              for export in part]
    258         print >> f, '\n'.join('  ' + export + data
    259                               for export, data in zip(part, is_data))
    260     deffiles.append(deffile)
    261   return deffiles
    262 
    263 
    264 def BuildImportLibs(flags, inputs_by_part, deffiles):
    265   """Runs the linker to generate an import library."""
    266   import_libs = []
    267   Log('building import libs')
    268   for i, (inputs, deffile) in enumerate(zip(inputs_by_part, deffiles)):
    269     libfile = 'part%d.lib' % i
    270     flags_with_implib_and_deffile = flags + ['/IMPLIB:%s' % libfile,
    271                                              '/DEF:%s' % deffile]
    272     RunLinker(flags_with_implib_and_deffile, i, inputs, 'implib', None)
    273     import_libs.append(libfile)
    274   return import_libs
    275 
    276 
    277 def AttemptLink(flags, inputs_by_part, unresolved_by_part, deffiles,
    278                 import_libs, intermediate_manifest):
    279   """Tries to run the linker for all parts using the current round of
    280   generated import libs and .def files. If the link fails, updates the
    281   unresolved externals list per part."""
    282   dlls = []
    283   all_succeeded = True
    284   new_externals = []
    285   Log('unresolveds now: %r' % [len(part) for part in unresolved_by_part])
    286   for i, (inputs, deffile) in enumerate(zip(inputs_by_part, deffiles)):
    287     Log('running link, part %d' % i)
    288     others_implibs = import_libs[:]
    289     others_implibs.pop(i)
    290     inputs_with_implib = inputs + filter(lambda x: x, others_implibs)
    291     if deffile:
    292       flags = flags + ['/DEF:%s' % deffile, '/LTCG']
    293     stdout, rc, output = RunLinker(
    294         flags, i, inputs_with_implib, 'final', intermediate_manifest)
    295     if rc != 0:
    296       all_succeeded = False
    297       new_externals.append(ParseOutExternals(stdout))
    298     else:
    299       new_externals.append([])
    300       dlls.append(output)
    301   combined_externals = [sorted(set(prev) | set(new))
    302                         for prev, new in zip(unresolved_by_part, new_externals)]
    303   return all_succeeded, dlls, combined_externals
    304 
    305 
    306 def ExtractSubObjsTargetedAtAll(
    307     inputs,
    308     num_parts,
    309     description_parts,
    310     description_all,
    311     description_all_from_libs):
    312   """For (lib, obj) tuples in the all_from_libs section, extract the obj out of
    313   the lib and added it to inputs. Returns a list of lists for which part the
    314   extracted obj belongs in (which is whichever the .lib isn't in)."""
    315   by_parts = [[] for _ in range(num_parts)]
    316   for lib_spec, obj_spec in description_all_from_libs:
    317     for input_file in inputs:
    318       if re.search(lib_spec, input_file):
    319         objs = GetLibObjList(input_file)
    320         match_count = 0
    321         for obj in objs:
    322           if re.search(obj_spec, obj, re.I):
    323             extracted_obj = ExtractObjFromLib(input_file, obj)
    324             #Log('extracted %s (%s %s)' % (extracted_obj, input_file, obj))
    325             i = PartFor(input_file, description_parts, description_all)
    326             if i == -1:
    327               raise SystemExit(
    328                   '%s is already in all parts, but matched '
    329                   '%s in all_from_libs' % (input_file, obj))
    330             # See note in main().
    331             assert num_parts == 2, "Can't handle > 2 dlls currently"
    332             by_parts[1 - i].append(obj)
    333             match_count += 1
    334         if match_count == 0:
    335           raise SystemExit(
    336               '%s, %s matched a lib, but no objs' % (lib_spec, obj_spec))
    337   return by_parts
    338 
    339 
    340 def main():
    341   flags, inputs, intermediate_manifest = GetFlagsAndInputs(sys.argv[1:])
    342   partition_file = os.path.normpath(
    343       os.path.join(BASE_DIR, '../../../build/split_link_partition.py'))
    344   with open(partition_file) as partition:
    345     description = eval(partition.read())
    346   inputs_by_part = []
    347   description_parts = description['parts']
    348   # We currently assume that if a symbol isn't in dll 0, then it's in dll 1
    349   # when generating def files. Otherwise, we'd need to do more complex things
    350   # to figure out where each symbol actually is to assign it to the correct
    351   # .def file.
    352   num_parts = len(description_parts)
    353   assert num_parts == 2, "Can't handle > 2 dlls currently"
    354   description_parts.reverse()
    355   objs_from_libs = ExtractSubObjsTargetedAtAll(
    356       inputs,
    357       num_parts,
    358       description_parts,
    359       description['all'],
    360       description['all_from_libs'])
    361   objs_from_libs.reverse()
    362   inputs_by_part = [[] for _ in range(num_parts)]
    363   for input_file in inputs:
    364     i = PartFor(input_file, description_parts, description['all'])
    365     if i == -1:
    366       for part in inputs_by_part:
    367         part.append(input_file)
    368     else:
    369       inputs_by_part[i].append(input_file)
    370   inputs_by_part.reverse()
    371 
    372   # Put the subobjs on to the main list.
    373   for i, part in enumerate(objs_from_libs):
    374     Log('%d sub .objs added to part %d' % (len(part), i))
    375     inputs_by_part[i].extend(part)
    376 
    377   unresolved_by_part = [[] for _ in range(num_parts)]
    378   import_libs = [None] * num_parts
    379   deffiles = [None] * num_parts
    380 
    381   data_exports = 0
    382   for i in range(5):
    383     Log('--- starting pass %d' % i)
    384     ok, dlls, unresolved_by_part = AttemptLink(
    385         flags, inputs_by_part, unresolved_by_part, deffiles, import_libs,
    386         intermediate_manifest)
    387     if ok:
    388       break
    389     data_exports = 0
    390     for i, part in enumerate(unresolved_by_part):
    391       for export in part:
    392         if IsDataDefinition(export):
    393           print 'part %d contains data export: %s (aka %s)' % (
    394               i, Unmangle(export), export)
    395           data_exports += 1
    396     deffiles = GenerateDefFiles(unresolved_by_part)
    397     import_libs = BuildImportLibs(flags, inputs_by_part, deffiles)
    398   else:
    399     if data_exports and not IGNORE_DATA:
    400       print '%d data exports found, see report above.' % data_exports
    401       print('These cannot be exported, and must be either duplicated to the '
    402             'target DLL (if constant), or wrapped in a function.')
    403     return 1
    404 
    405   mt_exe = GetMtPath()
    406   for i, dll in enumerate(dlls):
    407     Log('embedding manifest in %s' % dll)
    408     args = [mt_exe, '-nologo', '-manifest']
    409     args.append(ManifestNameForIndex(i))
    410     args.append(description['manifest'])
    411     args.append('-outputresource:%s;2' % dll)
    412     subprocess.check_call(args)
    413 
    414   Log('built %r' % dlls)
    415 
    416   return 0
    417 
    418 
    419 if __name__ == '__main__':
    420   sys.exit(main())
    421