Home | History | Annotate | Download | only in test
      1 #! /usr/bin/python
      2 
      3 import os
      4 import sys
      5 import string
      6 import re
      7 
      8 ## hash from symbol name to list of symbols with that name,
      9 ## where the list of symbols contains a list representing each symbol
     10 symbols = {}
     11 roots = {}
     12 
     13 def createBacklinks(name, syms):
     14     for s in syms:
     15         refs = s[2]
     16         for r in refs:
     17             ## for each ref, add ourselves as a referencer
     18             if symbols.has_key(r):
     19                 targets = symbols[r]
     20                 for t in targets:
     21                     if name not in t[5]:
     22                         t[5].append(name)
     23 
     24 def markSymbol(frm, name):
     25     if not symbols.has_key(name):
     26         print "%s referenced but was not in the objdump"
     27     syms = symbols[name]
     28     ## print ambiguous references unless they are internal noise like ".L129"
     29     if len(syms) > 1 and name[0] != '.':
     30         print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name)
     31         print syms
     32     for s in syms:
     33         if s[4]:
     34             pass ## already marked
     35         else:
     36             s[4] = 1
     37             refs = s[2]
     38             for r in refs:
     39                 markSymbol(s[0], r)
     40 
     41 def cmpFilename(a, b):
     42     v = cmp(a[1], b[1])
     43     if v == 0:
     44         v = cmp(a[0], b[0])
     45     return v
     46 
     47 def sizeAsString(bytes):
     48     if bytes < 1024:
     49         return "%d bytes" % bytes
     50     elif bytes < 1024*1024:
     51         return "%.2gK" % (bytes / 1024.0)
     52     else:
     53         return "%.2gM" % (bytes / 1024.0 / 1024.0)
     54 
     55 def printLost():
     56     list = []
     57     filename = None
     58     for (name, syms) in symbols.items():
     59         s = syms[0] ## we always mark all or none for now
     60         if not s[4] and name[0] != '.': ## skip .L129 type symbols
     61             filename = s[3]
     62             if not filename:
     63                 filename = "unknown file"
     64             list.append ((name, filename, s[5], s[7]))
     65 
     66     file_summaries = []
     67     total_unused = 0
     68     total_this_file = 0
     69     filename = None
     70     list.sort(cmpFilename)
     71     for l in list:
     72         next_filename = l[1]
     73         if next_filename != filename:
     74             if total_this_file > 0:
     75                 file_summaries.append ("  %s may be unused in %s" % (sizeAsString(total_this_file), filename))
     76             print "%s has these symbols not reachable from exported symbols:" % next_filename
     77             filename = next_filename
     78             total_this_file = 0
     79         print "    %s %s" % (l[0], sizeAsString(l[3]))
     80         total_unused = total_unused + l[3]
     81         total_this_file = total_this_file + l[3]
     82         for trace in l[2]:
     83             print "       referenced from %s" % trace
     84 
     85     for fs in file_summaries:
     86         print fs
     87     print "%s total may be unused" % sizeAsString(total_unused)
     88 
     89 def main():
     90 
     91     ## 0001aa44 <_dbus_message_get_network_data>:
     92     sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:')
     93     ## 1aa49:       e8 00 00 00 00          call   1aa4e <_dbus_message_get_network_data+0xa>
     94     ref_re = re.compile (' <([^>]+)> *$')
     95     ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139
     96     file_re = re.compile ('^(\/[^:].*):[0-9]+$')
     97     ## _dbus_message_get_network_data+0xa
     98     funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+')
     99     ## 00005410 T dbus_address_entries_free
    100     dynsym_re = re.compile ('T ([^ \n]+)$')
    101     
    102     filename = sys.argv[1]
    103 
    104     command = """
    105     objdump -D --demangle -l %s
    106     """ % filename
    107 
    108     command = string.strip (command)
    109 
    110     print "Running: %s" % command
    111     
    112     f = os.popen(command)    
    113 
    114     ## first we find which functions reference which other functions
    115     current_sym = None
    116     lines = f.readlines()
    117     for l in lines:
    118         addr = None
    119         name = None
    120         target = None
    121         file = None
    122         
    123         match = sym_re.match(l)
    124         if match:
    125             addr = match.group(1)
    126             name = match.group(2)
    127         else:
    128             match = ref_re.search(l)
    129             if match:
    130                 target = match.group(1)
    131             else:
    132                 match = file_re.match(l)
    133                 if match:
    134                     file = match.group(1)
    135 
    136         if name:
    137             ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size
    138             item = [name, addr, [], None, 0, [], 0, 0]
    139             if symbols.has_key(name):
    140                 symbols[name].append(item)
    141             else:
    142                 symbols[name] = [item]
    143 
    144             if current_sym:
    145                 prev_addr = long(current_sym[1], 16)
    146                 our_addr = long(item[1], 16)
    147                 item[7] = our_addr - prev_addr
    148                 if item[7] < 0:
    149                     print "Computed negative size %d for %s" % (item[7], item[0])
    150                     item[7] = 0
    151                                   
    152             current_sym = item
    153             
    154         elif target and current_sym:
    155             match = funcname_re.match(target)
    156             if match:
    157                 ## dump the "+address"
    158                 target = match.group(1)
    159             if target == current_sym[0]:
    160                 pass ## skip self-references
    161             else:
    162                 current_sym[2].append (target)
    163 
    164         elif file and current_sym:
    165             if file.startswith('/usr/include'):
    166                 ## inlined libc thingy
    167                 pass
    168             elif current_sym[0].startswith('.debug'):
    169                 ## debug info
    170                 pass
    171             elif current_sym[3] and current_sym[3] != file:
    172                 raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file))
    173             else:
    174                 current_sym[3] = file
    175 
    176     ## now we need to find the roots (exported symbols)
    177     command = "nm -D %s" % filename
    178     print "Running: %s" % command
    179     f = os.popen(command)
    180     lines = f.readlines ()
    181     for l in lines:
    182         match = dynsym_re.search(l)
    183         if match:
    184             name = match.group(1)
    185             if roots.has_key(name):
    186                 raise Exception("symbol %s exported twice?" % name)
    187             else:
    188                 roots[name] = 1
    189 
    190     print "%d symbols exported from this object" % len(roots)
    191 
    192     ## these functions are used only indirectly, so we don't
    193     ## notice they are used. Manually add them as roots...
    194     vtable_roots = ['unix_finalize',
    195                     'unix_handle_watch',
    196                     'unix_disconnect',
    197                     'unix_connection_set',
    198                     'unix_do_iteration',
    199                     'unix_live_messages_changed',
    200                     'unix_get_unix_fd',
    201                     'handle_client_data_cookie_sha1_mech',
    202                     'handle_client_data_external_mech',
    203                     'handle_server_data_cookie_sha1_mech',
    204                     'handle_server_data_external_mech',
    205                     'handle_client_initial_response_cookie_sha1_mech',                  
    206                     'handle_client_initial_response_external_mech',
    207                     'handle_client_shutdown_cookie_sha1_mech',
    208                     'handle_client_shutdown_external_mech',
    209                     'handle_server_shutdown_cookie_sha1_mech',
    210                     'handle_server_shutdown_external_mech'
    211                     ]
    212 
    213     for vr in vtable_roots:
    214         if roots.has_key(vr):
    215             raise Exception("%s is already a root" % vr)
    216         roots[vr] = 1
    217 
    218     for k in roots.keys():
    219         markSymbol("root", k)
    220 
    221     for (k, v) in symbols.items():
    222         createBacklinks(k, v)
    223 
    224     print """
    225 
    226 The symbols mentioned below don't appear to be reachable starting from
    227 the dynamic exports of the library. However, this program is pretty
    228 dumb; a limitation that creates false positives is that it can only
    229 trace 'reachable' through hardcoded function calls, if a function is
    230 called only through a vtable, it won't be marked reachable (and
    231 neither will its children in the call graph).
    232 
    233 Also, the sizes mentioned are more or less completely bogus.
    234 
    235 """
    236     
    237     print "The following are hardcoded in as vtable roots: %s" % vtable_roots
    238     
    239     printLost()
    240         
    241 if __name__ == "__main__":
    242     main()
    243