1 2 from collections import namedtuple 3 import glob 4 import os.path 5 import re 6 import shutil 7 import sys 8 import subprocess 9 10 11 VERBOSITY = 2 12 13 C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__)) 14 TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR) 15 ROOT_DIR = os.path.dirname(TOOLS_DIR) 16 GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt') 17 18 SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python'] 19 20 CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$') 21 22 23 IGNORED_VARS = { 24 '_DYNAMIC', 25 '_GLOBAL_OFFSET_TABLE_', 26 '__JCR_LIST__', 27 '__JCR_END__', 28 '__TMC_END__', 29 '__bss_start', 30 '__data_start', 31 '__dso_handle', 32 '_edata', 33 '_end', 34 } 35 36 37 def find_capi_vars(root): 38 capi_vars = {} 39 for dirname in SOURCE_DIRS: 40 for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'), 41 recursive=True): 42 with open(filename) as file: 43 for name in _find_capi_vars(file): 44 if name in capi_vars: 45 assert not filename.endswith('.c') 46 assert capi_vars[name].endswith('.c') 47 capi_vars[name] = filename 48 return capi_vars 49 50 51 def _find_capi_vars(lines): 52 for line in lines: 53 if not line.startswith('PyAPI_DATA'): 54 continue 55 assert '{' not in line 56 match = CAPI_REGEX.match(line) 57 assert match 58 names, = match.groups() 59 for name in names.split(', '): 60 yield name 61 62 63 def _read_global_names(filename): 64 # These variables are shared between all interpreters in the process. 65 with open(filename) as file: 66 return {line.partition('#')[0].strip() 67 for line in file 68 if line.strip() and not line.startswith('#')} 69 70 71 def _is_global_var(name, globalnames): 72 if _is_autogen_var(name): 73 return True 74 if _is_type_var(name): 75 return True 76 if _is_module(name): 77 return True 78 if _is_exception(name): 79 return True 80 if _is_compiler(name): 81 return True 82 return name in globalnames 83 84 85 def _is_autogen_var(name): 86 return ( 87 name.startswith('PyId_') or 88 '.' in name or 89 # Objects/typeobject.c 90 name.startswith('op_id.') or 91 name.startswith('rop_id.') or 92 # Python/graminit.c 93 name.startswith('arcs_') or 94 name.startswith('states_') 95 ) 96 97 98 def _is_type_var(name): 99 if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type? 100 return True 101 if name.endswith('_desc'): # for structseq types 102 return True 103 return ( 104 name.startswith('doc_') or 105 name.endswith(('_doc', '__doc__', '_docstring')) or 106 name.endswith('_methods') or 107 name.endswith('_fields') or 108 name.endswith(('_memberlist', '_members')) or 109 name.endswith('_slots') or 110 name.endswith(('_getset', '_getsets', '_getsetlist')) or 111 name.endswith('_as_mapping') or 112 name.endswith('_as_number') or 113 name.endswith('_as_sequence') or 114 name.endswith('_as_buffer') or 115 name.endswith('_as_async') 116 ) 117 118 119 def _is_module(name): 120 if name.endswith(('_functions', 'Methods', '_Methods')): 121 return True 122 if name == 'module_def': 123 return True 124 if name == 'initialized': 125 return True 126 return name.endswith(('module', '_Module')) 127 128 129 def _is_exception(name): 130 # Other vars are enumerated in globals-core.txt. 131 if not name.startswith(('PyExc_', '_PyExc_')): 132 return False 133 return name.endswith(('Error', 'Warning')) 134 135 136 def _is_compiler(name): 137 return ( 138 # Python/Python-ast.c 139 name.endswith('_type') or 140 name.endswith('_singleton') or 141 name.endswith('_attributes') 142 ) 143 144 145 class Var(namedtuple('Var', 'name kind scope capi filename')): 146 147 @classmethod 148 def parse_nm(cls, line, expected, ignored, capi_vars, globalnames): 149 _, _, line = line.partition(' ') # strip off the address 150 line = line.strip() 151 kind, _, line = line.partition(' ') 152 if kind in ignored or (): 153 return None 154 elif kind not in expected or (): 155 raise RuntimeError('unsupported NM type {!r}'.format(kind)) 156 157 name, _, filename = line.partition('\t') 158 name = name.strip() 159 if _is_autogen_var(name): 160 return None 161 if _is_global_var(name, globalnames): 162 scope = 'global' 163 else: 164 scope = None 165 capi = (name in capi_vars or ()) 166 if filename: 167 filename = os.path.relpath(filename.partition(':')[0]) 168 return cls(name, kind, scope, capi, filename or '~???~') 169 170 @property 171 def external(self): 172 return self.kind.isupper() 173 174 175 def find_vars(root, globals_filename=GLOBALS_FILE): 176 python = os.path.join(root, 'python') 177 if not os.path.exists(python): 178 raise RuntimeError('python binary missing (need to build it first?)') 179 capi_vars = find_capi_vars(root) 180 globalnames = _read_global_names(globals_filename) 181 182 nm = shutil.which('nm') 183 if nm is None: 184 # XXX Use dumpbin.exe /SYMBOLS on Windows. 185 raise NotImplementedError 186 else: 187 yield from (var 188 for var in _find_var_symbols(python, nm, capi_vars, 189 globalnames) 190 if var.name not in IGNORED_VARS) 191 192 193 NM_FUNCS = set('Tt') 194 NM_PUBLIC_VARS = set('BD') 195 NM_PRIVATE_VARS = set('bd') 196 NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS 197 NM_DATA = set('Rr') 198 NM_OTHER = set('ACGgiINpSsuUVvWw-?') 199 NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER 200 201 202 def _find_var_symbols(python, nm, capi_vars, globalnames): 203 args = [nm, 204 '--line-numbers', 205 python] 206 out = subprocess.check_output(args) 207 for line in out.decode('utf-8').splitlines(): 208 var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames) 209 if var is None: 210 continue 211 yield var 212 213 214 ####################################### 215 216 class Filter(namedtuple('Filter', 'name op value action')): 217 218 @classmethod 219 def parse(cls, raw): 220 action = '+' 221 if raw.startswith(('+', '-')): 222 action = raw[0] 223 raw = raw[1:] 224 # XXX Support < and >? 225 name, op, value = raw.partition('=') 226 return cls(name, op, value, action) 227 228 def check(self, var): 229 value = getattr(var, self.name, None) 230 if not self.op: 231 matched = bool(value) 232 elif self.op == '=': 233 matched = (value == self.value) 234 else: 235 raise NotImplementedError 236 237 if self.action == '+': 238 return matched 239 elif self.action == '-': 240 return not matched 241 else: 242 raise NotImplementedError 243 244 245 def filter_var(var, filters): 246 for filter in filters: 247 if not filter.check(var): 248 return False 249 return True 250 251 252 def make_sort_key(spec): 253 columns = [(col.strip('_'), '_' if col.startswith('_') else '') 254 for col in spec] 255 def sort_key(var): 256 return tuple(getattr(var, col).lstrip(prefix) 257 for col, prefix in columns) 258 return sort_key 259 260 261 def make_groups(allvars, spec): 262 group = spec 263 groups = {} 264 for var in allvars: 265 value = getattr(var, group) 266 key = '{}: {}'.format(group, value) 267 try: 268 groupvars = groups[key] 269 except KeyError: 270 groupvars = groups[key] = [] 271 groupvars.append(var) 272 return groups 273 274 275 def format_groups(groups, columns, fmts, widths): 276 for group in sorted(groups): 277 groupvars = groups[group] 278 yield '', 0 279 yield ' # {}'.format(group), 0 280 yield from format_vars(groupvars, columns, fmts, widths) 281 282 283 def format_vars(allvars, columns, fmts, widths): 284 fmt = ' '.join(fmts[col] for col in columns) 285 fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin 286 header = fmt.replace(':', ':^').format(*(col.upper() for col in columns)) 287 yield header, 0 288 div = ' '.join('-'*(widths[col]+2) for col in columns) 289 yield div, 0 290 for var in allvars: 291 values = (getattr(var, col) for col in columns) 292 row = fmt.format(*('X' if val is True else val or '' 293 for val in values)) 294 yield row, 1 295 yield div, 0 296 297 298 ####################################### 299 300 COLUMNS = 'name,external,capi,scope,filename' 301 COLUMN_NAMES = COLUMNS.split(',') 302 303 COLUMN_WIDTHS = {col: len(col) 304 for col in COLUMN_NAMES} 305 COLUMN_WIDTHS.update({ 306 'name': 50, 307 'scope': 7, 308 'filename': 40, 309 }) 310 COLUMN_FORMATS = {col: '{:%s}' % width 311 for col, width in COLUMN_WIDTHS.items()} 312 for col in COLUMN_FORMATS: 313 if COLUMN_WIDTHS[col] == len(col): 314 COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^') 315 316 317 def _parse_filters_arg(raw, error): 318 filters = [] 319 for value in raw.split(','): 320 value=value.strip() 321 if not value: 322 continue 323 try: 324 filter = Filter.parse(value) 325 if filter.name not in COLUMN_NAMES: 326 raise Exception('unsupported column {!r}'.format(filter.name)) 327 except Exception as e: 328 error('bad filter {!r}: {}'.format(raw, e)) 329 filters.append(filter) 330 return filters 331 332 333 def _parse_columns_arg(raw, error): 334 columns = raw.split(',') 335 for column in columns: 336 if column not in COLUMN_NAMES: 337 error('unsupported column {!r}'.format(column)) 338 return columns 339 340 341 def _parse_sort_arg(raw, error): 342 sort = raw.split(',') 343 for column in sort: 344 if column.lstrip('_') not in COLUMN_NAMES: 345 error('unsupported column {!r}'.format(column)) 346 return sort 347 348 349 def _parse_group_arg(raw, error): 350 if not raw: 351 return raw 352 group = raw 353 if group not in COLUMN_NAMES: 354 error('unsupported column {!r}'.format(group)) 355 if group != 'filename': 356 error('unsupported group {!r}'.format(group)) 357 return group 358 359 360 def parse_args(argv=None): 361 if argv is None: 362 argv = sys.argv[1:] 363 364 import argparse 365 parser = argparse.ArgumentParser() 366 367 parser.add_argument('-v', '--verbose', action='count', default=0) 368 parser.add_argument('-q', '--quiet', action='count', default=0) 369 370 parser.add_argument('--filters', default='-scope', 371 help='[[-]<COLUMN>[=<GLOB>]] ...') 372 373 parser.add_argument('--columns', default=COLUMNS, 374 help='a comma-separated list of columns to show') 375 parser.add_argument('--sort', default='filename,_name', 376 help='a comma-separated list of columns to sort') 377 parser.add_argument('--group', 378 help='group by the given column name (- to not group)') 379 380 parser.add_argument('--rc-on-match', dest='rc', type=int) 381 382 parser.add_argument('filename', nargs='?', default=GLOBALS_FILE) 383 384 args = parser.parse_args(argv) 385 386 verbose = vars(args).pop('verbose', 0) 387 quiet = vars(args).pop('quiet', 0) 388 args.verbosity = max(0, VERBOSITY + verbose - quiet) 389 390 if args.sort.startswith('filename') and not args.group: 391 args.group = 'filename' 392 393 if args.rc is None: 394 if '-scope=core' in args.filters or 'core' not in args.filters: 395 args.rc = 0 396 else: 397 args.rc = 1 398 399 args.filters = _parse_filters_arg(args.filters, parser.error) 400 args.columns = _parse_columns_arg(args.columns, parser.error) 401 args.sort = _parse_sort_arg(args.sort, parser.error) 402 args.group = _parse_group_arg(args.group, parser.error) 403 404 return args 405 406 407 def main(root=ROOT_DIR, filename=GLOBALS_FILE, 408 filters=None, columns=COLUMN_NAMES, sort=None, group=None, 409 verbosity=VERBOSITY, rc=1): 410 411 log = lambda msg: ... 412 if verbosity >= 2: 413 log = lambda msg: print(msg) 414 415 allvars = (var 416 for var in find_vars(root, filename) 417 if filter_var(var, filters)) 418 if sort: 419 allvars = sorted(allvars, key=make_sort_key(sort)) 420 421 if group: 422 try: 423 columns.remove(group) 424 except ValueError: 425 pass 426 grouped = make_groups(allvars, group) 427 lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS) 428 else: 429 lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS) 430 431 total = 0 432 for line, count in lines: 433 total += count 434 log(line) 435 log('\ntotal: {}'.format(total)) 436 437 if total and rc: 438 print('ERROR: found unsafe globals', file=sys.stderr) 439 return rc 440 return 0 441 442 443 if __name__ == '__main__': 444 args = parse_args() 445 sys.exit( 446 main(**vars(args))) 447