1 """Parse a Python module and describe its classes and methods. 2 3 Parse enough of a Python file to recognize imports and class and 4 method definitions, and to find out the superclasses of a class. 5 6 The interface consists of a single function: 7 readmodule_ex(module [, path]) 8 where module is the name of a Python module, and path is an optional 9 list of directories where the module is to be searched. If present, 10 path is prepended to the system search path sys.path. The return 11 value is a dictionary. The keys of the dictionary are the names of 12 the classes defined in the module (including classes that are defined 13 via the from XXX import YYY construct). The values are class 14 instances of the class Class defined here. One special key/value pair 15 is present for packages: the key '__path__' has a list as its value 16 which contains the package search path. 17 18 A class is described by the class Class in this module. Instances 19 of this class have the following instance variables: 20 module -- the module name 21 name -- the name of the class 22 super -- a list of super classes (Class instances) 23 methods -- a dictionary of methods 24 file -- the file in which the class was defined 25 lineno -- the line in the file on which the class statement occurred 26 The dictionary of methods uses the method names as keys and the line 27 numbers on which the method was defined as values. 28 If the name of a super class is not recognized, the corresponding 29 entry in the list of super classes is not a class instance but a 30 string giving the name of the super class. Since import statements 31 are recognized and imported modules are scanned as well, this 32 shouldn't happen often. 33 34 A function is described by the class Function in this module. 35 Instances of this class have the following instance variables: 36 module -- the module name 37 name -- the name of the class 38 file -- the file in which the class was defined 39 lineno -- the line in the file on which the class statement occurred 40 """ 41 42 import io 43 import sys 44 import importlib.util 45 import tokenize 46 from token import NAME, DEDENT, OP 47 48 __all__ = ["readmodule", "readmodule_ex", "Class", "Function"] 49 50 _modules = {} # cache of modules we've seen 51 52 # each Python class is represented by an instance of this class 53 class Class: 54 '''Class to represent a Python class.''' 55 def __init__(self, module, name, super, file, lineno): 56 self.module = module 57 self.name = name 58 if super is None: 59 super = [] 60 self.super = super 61 self.methods = {} 62 self.file = file 63 self.lineno = lineno 64 65 def _addmethod(self, name, lineno): 66 self.methods[name] = lineno 67 68 class Function: 69 '''Class to represent a top-level Python function''' 70 def __init__(self, module, name, file, lineno): 71 self.module = module 72 self.name = name 73 self.file = file 74 self.lineno = lineno 75 76 def readmodule(module, path=None): 77 '''Backwards compatible interface. 78 79 Call readmodule_ex() and then only keep Class objects from the 80 resulting dictionary.''' 81 82 res = {} 83 for key, value in _readmodule(module, path or []).items(): 84 if isinstance(value, Class): 85 res[key] = value 86 return res 87 88 def readmodule_ex(module, path=None): 89 '''Read a module file and return a dictionary of classes. 90 91 Search for MODULE in PATH and sys.path, read and parse the 92 module and return a dictionary with one entry for each class 93 found in the module. 94 ''' 95 return _readmodule(module, path or []) 96 97 def _readmodule(module, path, inpackage=None): 98 '''Do the hard work for readmodule[_ex]. 99 100 If INPACKAGE is given, it must be the dotted name of the package in 101 which we are searching for a submodule, and then PATH must be the 102 package search path; otherwise, we are searching for a top-level 103 module, and PATH is combined with sys.path. 104 ''' 105 # Compute the full module name (prepending inpackage if set) 106 if inpackage is not None: 107 fullmodule = "%s.%s" % (inpackage, module) 108 else: 109 fullmodule = module 110 111 # Check in the cache 112 if fullmodule in _modules: 113 return _modules[fullmodule] 114 115 # Initialize the dict for this module's contents 116 dict = {} 117 118 # Check if it is a built-in module; we don't do much for these 119 if module in sys.builtin_module_names and inpackage is None: 120 _modules[module] = dict 121 return dict 122 123 # Check for a dotted module name 124 i = module.rfind('.') 125 if i >= 0: 126 package = module[:i] 127 submodule = module[i+1:] 128 parent = _readmodule(package, path, inpackage) 129 if inpackage is not None: 130 package = "%s.%s" % (inpackage, package) 131 if not '__path__' in parent: 132 raise ImportError('No package named {}'.format(package)) 133 return _readmodule(submodule, parent['__path__'], package) 134 135 # Search the path for the module 136 f = None 137 if inpackage is not None: 138 search_path = path 139 else: 140 search_path = path + sys.path 141 # XXX This will change once issue19944 lands. 142 spec = importlib.util._find_spec_from_path(fullmodule, search_path) 143 _modules[fullmodule] = dict 144 # is module a package? 145 if spec.submodule_search_locations is not None: 146 dict['__path__'] = spec.submodule_search_locations 147 try: 148 source = spec.loader.get_source(fullmodule) 149 if source is None: 150 return dict 151 except (AttributeError, ImportError): 152 # not Python source, can't do anything with this module 153 return dict 154 155 fname = spec.loader.get_filename(fullmodule) 156 157 f = io.StringIO(source) 158 159 stack = [] # stack of (class, indent) pairs 160 161 g = tokenize.generate_tokens(f.readline) 162 try: 163 for tokentype, token, start, _end, _line in g: 164 if tokentype == DEDENT: 165 lineno, thisindent = start 166 # close nested classes and defs 167 while stack and stack[-1][1] >= thisindent: 168 del stack[-1] 169 elif token == 'def': 170 lineno, thisindent = start 171 # close previous nested classes and defs 172 while stack and stack[-1][1] >= thisindent: 173 del stack[-1] 174 tokentype, meth_name, start = next(g)[0:3] 175 if tokentype != NAME: 176 continue # Syntax error 177 if stack: 178 cur_class = stack[-1][0] 179 if isinstance(cur_class, Class): 180 # it's a method 181 cur_class._addmethod(meth_name, lineno) 182 # else it's a nested def 183 else: 184 # it's a function 185 dict[meth_name] = Function(fullmodule, meth_name, 186 fname, lineno) 187 stack.append((None, thisindent)) # Marker for nested fns 188 elif token == 'class': 189 lineno, thisindent = start 190 # close previous nested classes and defs 191 while stack and stack[-1][1] >= thisindent: 192 del stack[-1] 193 tokentype, class_name, start = next(g)[0:3] 194 if tokentype != NAME: 195 continue # Syntax error 196 # parse what follows the class name 197 tokentype, token, start = next(g)[0:3] 198 inherit = None 199 if token == '(': 200 names = [] # List of superclasses 201 # there's a list of superclasses 202 level = 1 203 super = [] # Tokens making up current superclass 204 while True: 205 tokentype, token, start = next(g)[0:3] 206 if token in (')', ',') and level == 1: 207 n = "".join(super) 208 if n in dict: 209 # we know this super class 210 n = dict[n] 211 else: 212 c = n.split('.') 213 if len(c) > 1: 214 # super class is of the form 215 # module.class: look in module for 216 # class 217 m = c[-2] 218 c = c[-1] 219 if m in _modules: 220 d = _modules[m] 221 if c in d: 222 n = d[c] 223 names.append(n) 224 super = [] 225 if token == '(': 226 level += 1 227 elif token == ')': 228 level -= 1 229 if level == 0: 230 break 231 elif token == ',' and level == 1: 232 pass 233 # only use NAME and OP (== dot) tokens for type name 234 elif tokentype in (NAME, OP) and level == 1: 235 super.append(token) 236 # expressions in the base list are not supported 237 inherit = names 238 cur_class = Class(fullmodule, class_name, inherit, 239 fname, lineno) 240 if not stack: 241 dict[class_name] = cur_class 242 stack.append((cur_class, thisindent)) 243 elif token == 'import' and start[1] == 0: 244 modules = _getnamelist(g) 245 for mod, _mod2 in modules: 246 try: 247 # Recursively read the imported module 248 if inpackage is None: 249 _readmodule(mod, path) 250 else: 251 try: 252 _readmodule(mod, path, inpackage) 253 except ImportError: 254 _readmodule(mod, []) 255 except: 256 # If we can't find or parse the imported module, 257 # too bad -- don't die here. 258 pass 259 elif token == 'from' and start[1] == 0: 260 mod, token = _getname(g) 261 if not mod or token != "import": 262 continue 263 names = _getnamelist(g) 264 try: 265 # Recursively read the imported module 266 d = _readmodule(mod, path, inpackage) 267 except: 268 # If we can't find or parse the imported module, 269 # too bad -- don't die here. 270 continue 271 # add any classes that were defined in the imported module 272 # to our name space if they were mentioned in the list 273 for n, n2 in names: 274 if n in d: 275 dict[n2 or n] = d[n] 276 elif n == '*': 277 # don't add names that start with _ 278 for n in d: 279 if n[0] != '_': 280 dict[n] = d[n] 281 except StopIteration: 282 pass 283 284 f.close() 285 return dict 286 287 def _getnamelist(g): 288 # Helper to get a comma-separated list of dotted names plus 'as' 289 # clauses. Return a list of pairs (name, name2) where name2 is 290 # the 'as' name, or None if there is no 'as' clause. 291 names = [] 292 while True: 293 name, token = _getname(g) 294 if not name: 295 break 296 if token == 'as': 297 name2, token = _getname(g) 298 else: 299 name2 = None 300 names.append((name, name2)) 301 while token != "," and "\n" not in token: 302 token = next(g)[1] 303 if token != ",": 304 break 305 return names 306 307 def _getname(g): 308 # Helper to get a dotted name, return a pair (name, token) where 309 # name is the dotted name, or None if there was no dotted name, 310 # and token is the next input token. 311 parts = [] 312 tokentype, token = next(g)[0:2] 313 if tokentype != NAME and token != '*': 314 return (None, token) 315 parts.append(token) 316 while True: 317 tokentype, token = next(g)[0:2] 318 if token != '.': 319 break 320 tokentype, token = next(g)[0:2] 321 if tokentype != NAME: 322 break 323 parts.append(token) 324 return (".".join(parts), token) 325 326 def _main(): 327 # Main program for testing. 328 import os 329 from operator import itemgetter 330 mod = sys.argv[1] 331 if os.path.exists(mod): 332 path = [os.path.dirname(mod)] 333 mod = os.path.basename(mod) 334 if mod.lower().endswith(".py"): 335 mod = mod[:-3] 336 else: 337 path = [] 338 dict = readmodule_ex(mod, path) 339 objs = list(dict.values()) 340 objs.sort(key=lambda a: getattr(a, 'lineno', 0)) 341 for obj in objs: 342 if isinstance(obj, Class): 343 print("class", obj.name, obj.super, obj.lineno) 344 methods = sorted(obj.methods.items(), key=itemgetter(1)) 345 for name, lineno in methods: 346 if name != "__path__": 347 print(" def", name, lineno) 348 elif isinstance(obj, Function): 349 print("def", obj.name, obj.lineno) 350 351 if __name__ == "__main__": 352 _main() 353