1 """Parse a Python module and describe its classes and methods. 2 3 Parse enough of a Python file to recognize imports and class and 4 method definitions, and to find out the superclasses of a class. 5 6 The interface consists of a single function: 7 readmodule_ex(module [, path]) 8 where module is the name of a Python module, and path is an optional 9 list of directories where the module is to be searched. If present, 10 path is prepended to the system search path sys.path. The return 11 value is a dictionary. The keys of the dictionary are the names of 12 the classes defined in the module (including classes that are defined 13 via the from XXX import YYY construct). The values are class 14 instances of the class Class defined here. One special key/value pair 15 is present for packages: the key '__path__' has a list as its value 16 which contains the package search path. 17 18 A class is described by the class Class in this module. Instances 19 of this class have the following instance variables: 20 module -- the module name 21 name -- the name of the class 22 super -- a list of super classes (Class instances) 23 methods -- a dictionary of methods 24 file -- the file in which the class was defined 25 lineno -- the line in the file on which the class statement occurred 26 The dictionary of methods uses the method names as keys and the line 27 numbers on which the method was defined as values. 28 If the name of a super class is not recognized, the corresponding 29 entry in the list of super classes is not a class instance but a 30 string giving the name of the super class. Since import statements 31 are recognized and imported modules are scanned as well, this 32 shouldn't happen often. 33 34 A function is described by the class Function in this module. 35 Instances of this class have the following instance variables: 36 module -- the module name 37 name -- the name of the class 38 file -- the file in which the class was defined 39 lineno -- the line in the file on which the class statement occurred 40 """ 41 42 import sys 43 import imp 44 import tokenize 45 from token import NAME, DEDENT, OP 46 from operator import itemgetter 47 48 __all__ = ["readmodule", "readmodule_ex", "Class", "Function"] 49 50 _modules = {} # cache of modules we've seen 51 52 # each Python class is represented by an instance of this class 53 class Class: 54 '''Class to represent a Python class.''' 55 def __init__(self, module, name, super, file, lineno): 56 self.module = module 57 self.name = name 58 if super is None: 59 super = [] 60 self.super = super 61 self.methods = {} 62 self.file = file 63 self.lineno = lineno 64 65 def _addmethod(self, name, lineno): 66 self.methods[name] = lineno 67 68 class Function: 69 '''Class to represent a top-level Python function''' 70 def __init__(self, module, name, file, lineno): 71 self.module = module 72 self.name = name 73 self.file = file 74 self.lineno = lineno 75 76 def readmodule(module, path=None): 77 '''Backwards compatible interface. 78 79 Call readmodule_ex() and then only keep Class objects from the 80 resulting dictionary.''' 81 82 res = {} 83 for key, value in _readmodule(module, path or []).items(): 84 if isinstance(value, Class): 85 res[key] = value 86 return res 87 88 def readmodule_ex(module, path=None): 89 '''Read a module file and return a dictionary of classes. 90 91 Search for MODULE in PATH and sys.path, read and parse the 92 module and return a dictionary with one entry for each class 93 found in the module. 94 ''' 95 return _readmodule(module, path or []) 96 97 def _readmodule(module, path, inpackage=None): 98 '''Do the hard work for readmodule[_ex]. 99 100 If INPACKAGE is given, it must be the dotted name of the package in 101 which we are searching for a submodule, and then PATH must be the 102 package search path; otherwise, we are searching for a top-level 103 module, and PATH is combined with sys.path. 104 ''' 105 # Compute the full module name (prepending inpackage if set) 106 if inpackage is not None: 107 fullmodule = "%s.%s" % (inpackage, module) 108 else: 109 fullmodule = module 110 111 # Check in the cache 112 if fullmodule in _modules: 113 return _modules[fullmodule] 114 115 # Initialize the dict for this module's contents 116 dict = {} 117 118 # Check if it is a built-in module; we don't do much for these 119 if module in sys.builtin_module_names and inpackage is None: 120 _modules[module] = dict 121 return dict 122 123 # Check for a dotted module name 124 i = module.rfind('.') 125 if i >= 0: 126 package = module[:i] 127 submodule = module[i+1:] 128 parent = _readmodule(package, path, inpackage) 129 if inpackage is not None: 130 package = "%s.%s" % (inpackage, package) 131 if not '__path__' in parent: 132 raise ImportError('No package named {}'.format(package)) 133 return _readmodule(submodule, parent['__path__'], package) 134 135 # Search the path for the module 136 f = None 137 if inpackage is not None: 138 f, fname, (_s, _m, ty) = imp.find_module(module, path) 139 else: 140 f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path) 141 if ty == imp.PKG_DIRECTORY: 142 dict['__path__'] = [fname] 143 path = [fname] + path 144 f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname]) 145 _modules[fullmodule] = dict 146 if ty != imp.PY_SOURCE: 147 # not Python source, can't do anything with this module 148 f.close() 149 return dict 150 151 stack = [] # stack of (class, indent) pairs 152 153 g = tokenize.generate_tokens(f.readline) 154 try: 155 for tokentype, token, start, _end, _line in g: 156 if tokentype == DEDENT: 157 lineno, thisindent = start 158 # close nested classes and defs 159 while stack and stack[-1][1] >= thisindent: 160 del stack[-1] 161 elif token == 'def': 162 lineno, thisindent = start 163 # close previous nested classes and defs 164 while stack and stack[-1][1] >= thisindent: 165 del stack[-1] 166 tokentype, meth_name, start = g.next()[0:3] 167 if tokentype != NAME: 168 continue # Syntax error 169 if stack: 170 cur_class = stack[-1][0] 171 if isinstance(cur_class, Class): 172 # it's a method 173 cur_class._addmethod(meth_name, lineno) 174 # else it's a nested def 175 else: 176 # it's a function 177 dict[meth_name] = Function(fullmodule, meth_name, 178 fname, lineno) 179 stack.append((None, thisindent)) # Marker for nested fns 180 elif token == 'class': 181 lineno, thisindent = start 182 # close previous nested classes and defs 183 while stack and stack[-1][1] >= thisindent: 184 del stack[-1] 185 tokentype, class_name, start = g.next()[0:3] 186 if tokentype != NAME: 187 continue # Syntax error 188 # parse what follows the class name 189 tokentype, token, start = g.next()[0:3] 190 inherit = None 191 if token == '(': 192 names = [] # List of superclasses 193 # there's a list of superclasses 194 level = 1 195 super = [] # Tokens making up current superclass 196 while True: 197 tokentype, token, start = g.next()[0:3] 198 if token in (')', ',') and level == 1: 199 n = "".join(super) 200 if n in dict: 201 # we know this super class 202 n = dict[n] 203 else: 204 c = n.split('.') 205 if len(c) > 1: 206 # super class is of the form 207 # module.class: look in module for 208 # class 209 m = c[-2] 210 c = c[-1] 211 if m in _modules: 212 d = _modules[m] 213 if c in d: 214 n = d[c] 215 names.append(n) 216 super = [] 217 if token == '(': 218 level += 1 219 elif token == ')': 220 level -= 1 221 if level == 0: 222 break 223 elif token == ',' and level == 1: 224 pass 225 # only use NAME and OP (== dot) tokens for type name 226 elif tokentype in (NAME, OP) and level == 1: 227 super.append(token) 228 # expressions in the base list are not supported 229 inherit = names 230 cur_class = Class(fullmodule, class_name, inherit, 231 fname, lineno) 232 if not stack: 233 dict[class_name] = cur_class 234 stack.append((cur_class, thisindent)) 235 elif token == 'import' and start[1] == 0: 236 modules = _getnamelist(g) 237 for mod, _mod2 in modules: 238 try: 239 # Recursively read the imported module 240 if inpackage is None: 241 _readmodule(mod, path) 242 else: 243 try: 244 _readmodule(mod, path, inpackage) 245 except ImportError: 246 _readmodule(mod, []) 247 except: 248 # If we can't find or parse the imported module, 249 # too bad -- don't die here. 250 pass 251 elif token == 'from' and start[1] == 0: 252 mod, token = _getname(g) 253 if not mod or token != "import": 254 continue 255 names = _getnamelist(g) 256 try: 257 # Recursively read the imported module 258 d = _readmodule(mod, path, inpackage) 259 except: 260 # If we can't find or parse the imported module, 261 # too bad -- don't die here. 262 continue 263 # add any classes that were defined in the imported module 264 # to our name space if they were mentioned in the list 265 for n, n2 in names: 266 if n in d: 267 dict[n2 or n] = d[n] 268 elif n == '*': 269 # don't add names that start with _ 270 for n in d: 271 if n[0] != '_': 272 dict[n] = d[n] 273 except StopIteration: 274 pass 275 276 f.close() 277 return dict 278 279 def _getnamelist(g): 280 # Helper to get a comma-separated list of dotted names plus 'as' 281 # clauses. Return a list of pairs (name, name2) where name2 is 282 # the 'as' name, or None if there is no 'as' clause. 283 names = [] 284 while True: 285 name, token = _getname(g) 286 if not name: 287 break 288 if token == 'as': 289 name2, token = _getname(g) 290 else: 291 name2 = None 292 names.append((name, name2)) 293 while token != "," and "\n" not in token: 294 token = g.next()[1] 295 if token != ",": 296 break 297 return names 298 299 def _getname(g): 300 # Helper to get a dotted name, return a pair (name, token) where 301 # name is the dotted name, or None if there was no dotted name, 302 # and token is the next input token. 303 parts = [] 304 tokentype, token = g.next()[0:2] 305 if tokentype != NAME and token != '*': 306 return (None, token) 307 parts.append(token) 308 while True: 309 tokentype, token = g.next()[0:2] 310 if token != '.': 311 break 312 tokentype, token = g.next()[0:2] 313 if tokentype != NAME: 314 break 315 parts.append(token) 316 return (".".join(parts), token) 317 318 def _main(): 319 # Main program for testing. 320 import os 321 mod = sys.argv[1] 322 if os.path.exists(mod): 323 path = [os.path.dirname(mod)] 324 mod = os.path.basename(mod) 325 if mod.lower().endswith(".py"): 326 mod = mod[:-3] 327 else: 328 path = [] 329 dict = readmodule_ex(mod, path) 330 objs = dict.values() 331 objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0), 332 getattr(b, 'lineno', 0))) 333 for obj in objs: 334 if isinstance(obj, Class): 335 print "class", obj.name, obj.super, obj.lineno 336 methods = sorted(obj.methods.iteritems(), key=itemgetter(1)) 337 for name, lineno in methods: 338 if name != "__path__": 339 print " def", name, lineno 340 elif isinstance(obj, Function): 341 print "def", obj.name, obj.lineno 342 343 if __name__ == "__main__": 344 _main() 345