Home | History | Annotate | Download | only in c-globals
      1 
      2 from collections import namedtuple
      3 import glob
      4 import os.path
      5 import re
      6 import shutil
      7 import sys
      8 import subprocess
      9 
     10 
     11 VERBOSITY = 2
     12 
     13 C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
     14 TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
     15 ROOT_DIR = os.path.dirname(TOOLS_DIR)
     16 GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
     17 
     18 SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
     19 
     20 CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
     21 
     22 
     23 IGNORED_VARS = {
     24         '_DYNAMIC',
     25         '_GLOBAL_OFFSET_TABLE_',
     26         '__JCR_LIST__',
     27         '__JCR_END__',
     28         '__TMC_END__',
     29         '__bss_start',
     30         '__data_start',
     31         '__dso_handle',
     32         '_edata',
     33         '_end',
     34         }
     35 
     36 
     37 def find_capi_vars(root):
     38     capi_vars = {}
     39     for dirname in SOURCE_DIRS:
     40         for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'),
     41                                   recursive=True):
     42             with open(filename) as file:
     43                 for name in _find_capi_vars(file):
     44                     if name in capi_vars:
     45                         assert not filename.endswith('.c')
     46                         assert capi_vars[name].endswith('.c')
     47                     capi_vars[name] = filename
     48     return capi_vars
     49 
     50 
     51 def _find_capi_vars(lines):
     52     for line in lines:
     53         if not line.startswith('PyAPI_DATA'):
     54             continue
     55         assert '{' not in line
     56         match = CAPI_REGEX.match(line)
     57         assert match
     58         names, = match.groups()
     59         for name in names.split(', '):
     60             yield name
     61 
     62 
     63 def _read_global_names(filename):
     64     # These variables are shared between all interpreters in the process.
     65     with open(filename) as file:
     66         return {line.partition('#')[0].strip()
     67                 for line in file
     68                 if line.strip() and not line.startswith('#')}
     69 
     70 
     71 def _is_global_var(name, globalnames):
     72     if _is_autogen_var(name):
     73         return True
     74     if _is_type_var(name):
     75         return True
     76     if _is_module(name):
     77         return True
     78     if _is_exception(name):
     79         return True
     80     if _is_compiler(name):
     81         return True
     82     return name in globalnames
     83 
     84 
     85 def _is_autogen_var(name):
     86     return (
     87         name.startswith('PyId_') or
     88         '.' in name or
     89         # Objects/typeobject.c
     90         name.startswith('op_id.') or
     91         name.startswith('rop_id.') or
     92         # Python/graminit.c
     93         name.startswith('arcs_') or
     94         name.startswith('states_')
     95         )
     96 
     97 
     98 def _is_type_var(name):
     99     if name.endswith(('Type', '_Type', '_type')):  # XXX Always a static type?
    100         return True
    101     if name.endswith('_desc'):  # for structseq types
    102         return True
    103     return (
    104         name.startswith('doc_') or
    105         name.endswith(('_doc', '__doc__', '_docstring')) or
    106         name.endswith('_methods') or
    107         name.endswith('_fields') or
    108         name.endswith(('_memberlist', '_members')) or
    109         name.endswith('_slots') or
    110         name.endswith(('_getset', '_getsets', '_getsetlist')) or
    111         name.endswith('_as_mapping') or
    112         name.endswith('_as_number') or
    113         name.endswith('_as_sequence') or
    114         name.endswith('_as_buffer') or
    115         name.endswith('_as_async')
    116         )
    117 
    118 
    119 def _is_module(name):
    120     if name.endswith(('_functions', 'Methods', '_Methods')):
    121         return True
    122     if name == 'module_def':
    123         return True
    124     if name == 'initialized':
    125         return True
    126     return name.endswith(('module', '_Module'))
    127 
    128 
    129 def _is_exception(name):
    130     # Other vars are enumerated in globals-core.txt.
    131     if not name.startswith(('PyExc_', '_PyExc_')):
    132         return False
    133     return name.endswith(('Error', 'Warning'))
    134 
    135 
    136 def _is_compiler(name):
    137     return (
    138         # Python/Python-ast.c
    139         name.endswith('_type') or
    140         name.endswith('_singleton') or
    141         name.endswith('_attributes')
    142         )
    143 
    144 
    145 class Var(namedtuple('Var', 'name kind scope capi filename')):
    146 
    147     @classmethod
    148     def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
    149         _, _, line = line.partition(' ')  # strip off the address
    150         line = line.strip()
    151         kind, _, line = line.partition(' ')
    152         if kind in ignored or ():
    153             return None
    154         elif kind not in expected or ():
    155             raise RuntimeError('unsupported NM type {!r}'.format(kind))
    156 
    157         name, _, filename = line.partition('\t')
    158         name = name.strip()
    159         if _is_autogen_var(name):
    160             return None
    161         if _is_global_var(name, globalnames):
    162             scope = 'global'
    163         else:
    164             scope = None
    165         capi = (name in capi_vars or ())
    166         if filename:
    167             filename = os.path.relpath(filename.partition(':')[0])
    168         return cls(name, kind, scope, capi, filename or '~???~')
    169 
    170     @property
    171     def external(self):
    172         return self.kind.isupper()
    173 
    174 
    175 def find_vars(root, globals_filename=GLOBALS_FILE):
    176     python = os.path.join(root, 'python')
    177     if not os.path.exists(python):
    178         raise RuntimeError('python binary missing (need to build it first?)')
    179     capi_vars = find_capi_vars(root)
    180     globalnames = _read_global_names(globals_filename)
    181 
    182     nm = shutil.which('nm')
    183     if nm is None:
    184         # XXX Use dumpbin.exe /SYMBOLS on Windows.
    185         raise NotImplementedError
    186     else:
    187         yield from (var
    188                     for var in _find_var_symbols(python, nm, capi_vars,
    189                                                  globalnames)
    190                     if var.name not in IGNORED_VARS)
    191 
    192 
    193 NM_FUNCS = set('Tt')
    194 NM_PUBLIC_VARS = set('BD')
    195 NM_PRIVATE_VARS = set('bd')
    196 NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
    197 NM_DATA = set('Rr')
    198 NM_OTHER = set('ACGgiINpSsuUVvWw-?')
    199 NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
    200 
    201 
    202 def _find_var_symbols(python, nm, capi_vars, globalnames):
    203     args = [nm,
    204             '--line-numbers',
    205             python]
    206     out = subprocess.check_output(args)
    207     for line in out.decode('utf-8').splitlines():
    208         var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
    209         if var is None:
    210             continue
    211         yield var
    212 
    213 
    214 #######################################
    215 
    216 class Filter(namedtuple('Filter', 'name op value action')):
    217 
    218     @classmethod
    219     def parse(cls, raw):
    220         action = '+'
    221         if raw.startswith(('+', '-')):
    222             action = raw[0]
    223             raw = raw[1:]
    224         # XXX Support < and >?
    225         name, op, value = raw.partition('=')
    226         return cls(name, op, value, action)
    227 
    228     def check(self, var):
    229         value = getattr(var, self.name, None)
    230         if not self.op:
    231             matched = bool(value)
    232         elif self.op == '=':
    233             matched = (value == self.value)
    234         else:
    235             raise NotImplementedError
    236 
    237         if self.action == '+':
    238             return matched
    239         elif self.action == '-':
    240             return not matched
    241         else:
    242             raise NotImplementedError
    243 
    244 
    245 def filter_var(var, filters):
    246     for filter in filters:
    247         if not filter.check(var):
    248             return False
    249     return True
    250 
    251 
    252 def make_sort_key(spec):
    253     columns = [(col.strip('_'), '_' if col.startswith('_') else '')
    254                for col in spec]
    255     def sort_key(var):
    256         return tuple(getattr(var, col).lstrip(prefix)
    257                      for col, prefix in columns)
    258     return sort_key
    259 
    260 
    261 def make_groups(allvars, spec):
    262     group = spec
    263     groups = {}
    264     for var in allvars:
    265         value = getattr(var, group)
    266         key = '{}: {}'.format(group, value)
    267         try:
    268             groupvars = groups[key]
    269         except KeyError:
    270             groupvars = groups[key] = []
    271         groupvars.append(var)
    272     return groups
    273 
    274 
    275 def format_groups(groups, columns, fmts, widths):
    276     for group in sorted(groups):
    277         groupvars = groups[group]
    278         yield '', 0
    279         yield '  # {}'.format(group), 0
    280         yield from format_vars(groupvars, columns, fmts, widths)
    281 
    282 
    283 def format_vars(allvars, columns, fmts, widths):
    284     fmt = ' '.join(fmts[col] for col in columns)
    285     fmt = ' ' + fmt.replace(' ', '   ') + ' '  # for div margin
    286     header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
    287     yield header, 0
    288     div = ' '.join('-'*(widths[col]+2) for col in columns)
    289     yield div, 0
    290     for var in allvars:
    291         values = (getattr(var, col) for col in columns)
    292         row = fmt.format(*('X' if val is True else val or ''
    293                            for val in values))
    294         yield row, 1
    295     yield div, 0
    296 
    297 
    298 #######################################
    299 
    300 COLUMNS = 'name,external,capi,scope,filename'
    301 COLUMN_NAMES = COLUMNS.split(',')
    302 
    303 COLUMN_WIDTHS = {col: len(col)
    304                  for col in COLUMN_NAMES}
    305 COLUMN_WIDTHS.update({
    306         'name': 50,
    307         'scope': 7,
    308         'filename': 40,
    309         })
    310 COLUMN_FORMATS = {col: '{:%s}' % width
    311                   for col, width in COLUMN_WIDTHS.items()}
    312 for col in COLUMN_FORMATS:
    313     if COLUMN_WIDTHS[col] == len(col):
    314         COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
    315 
    316 
    317 def _parse_filters_arg(raw, error):
    318     filters = []
    319     for value in raw.split(','):
    320         value=value.strip()
    321         if not value:
    322             continue
    323         try:
    324             filter = Filter.parse(value)
    325             if filter.name not in COLUMN_NAMES:
    326                 raise Exception('unsupported column {!r}'.format(filter.name))
    327         except Exception as e:
    328             error('bad filter {!r}: {}'.format(raw, e))
    329         filters.append(filter)
    330     return filters
    331 
    332 
    333 def _parse_columns_arg(raw, error):
    334     columns = raw.split(',')
    335     for column in columns:
    336         if column not in COLUMN_NAMES:
    337             error('unsupported column {!r}'.format(column))
    338     return columns
    339 
    340 
    341 def _parse_sort_arg(raw, error):
    342     sort = raw.split(',')
    343     for column in sort:
    344         if column.lstrip('_') not in COLUMN_NAMES:
    345             error('unsupported column {!r}'.format(column))
    346     return sort
    347 
    348 
    349 def _parse_group_arg(raw, error):
    350     if not raw:
    351         return raw
    352     group = raw
    353     if group not in COLUMN_NAMES:
    354         error('unsupported column {!r}'.format(group))
    355     if group != 'filename':
    356         error('unsupported group {!r}'.format(group))
    357     return group
    358 
    359 
    360 def parse_args(argv=None):
    361     if argv is None:
    362         argv = sys.argv[1:]
    363 
    364     import argparse
    365     parser = argparse.ArgumentParser()
    366 
    367     parser.add_argument('-v', '--verbose', action='count', default=0)
    368     parser.add_argument('-q', '--quiet', action='count', default=0)
    369 
    370     parser.add_argument('--filters', default='-scope',
    371                         help='[[-]<COLUMN>[=<GLOB>]] ...')
    372 
    373     parser.add_argument('--columns', default=COLUMNS,
    374                         help='a comma-separated list of columns to show')
    375     parser.add_argument('--sort', default='filename,_name',
    376                         help='a comma-separated list of columns to sort')
    377     parser.add_argument('--group',
    378                         help='group by the given column name (- to not group)')
    379 
    380     parser.add_argument('--rc-on-match', dest='rc', type=int)
    381 
    382     parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
    383 
    384     args = parser.parse_args(argv)
    385 
    386     verbose = vars(args).pop('verbose', 0)
    387     quiet = vars(args).pop('quiet', 0)
    388     args.verbosity = max(0, VERBOSITY + verbose - quiet)
    389 
    390     if args.sort.startswith('filename') and not args.group:
    391         args.group = 'filename'
    392 
    393     if args.rc is None:
    394         if '-scope=core' in args.filters or 'core' not in args.filters:
    395             args.rc = 0
    396         else:
    397             args.rc = 1
    398 
    399     args.filters = _parse_filters_arg(args.filters, parser.error)
    400     args.columns = _parse_columns_arg(args.columns, parser.error)
    401     args.sort = _parse_sort_arg(args.sort, parser.error)
    402     args.group = _parse_group_arg(args.group, parser.error)
    403 
    404     return args
    405 
    406 
    407 def main(root=ROOT_DIR, filename=GLOBALS_FILE,
    408          filters=None, columns=COLUMN_NAMES, sort=None, group=None,
    409          verbosity=VERBOSITY, rc=1):
    410 
    411     log = lambda msg: ...
    412     if verbosity >= 2:
    413         log = lambda msg: print(msg)
    414 
    415     allvars = (var
    416                for var in find_vars(root, filename)
    417                if filter_var(var, filters))
    418     if sort:
    419         allvars = sorted(allvars, key=make_sort_key(sort))
    420 
    421     if group:
    422         try:
    423             columns.remove(group)
    424         except ValueError:
    425             pass
    426         grouped = make_groups(allvars, group)
    427         lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
    428     else:
    429         lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
    430 
    431     total = 0
    432     for line, count in lines:
    433         total += count
    434         log(line)
    435     log('\ntotal: {}'.format(total))
    436 
    437     if total and rc:
    438         print('ERROR: found unsafe globals', file=sys.stderr)
    439         return rc
    440     return 0
    441 
    442 
    443 if __name__ == '__main__':
    444     args = parse_args()
    445     sys.exit(
    446             main(**vars(args)))
    447