1 """text_file 2 3 provides the TextFile class, which gives an interface to text files 4 that (optionally) takes care of stripping comments, ignoring blank 5 lines, and joining lines with backslashes.""" 6 7 __revision__ = "$Id$" 8 9 import sys 10 11 12 class TextFile: 13 14 """Provides a file-like object that takes care of all the things you 15 commonly want to do when processing a text file that has some 16 line-by-line syntax: strip comments (as long as "#" is your 17 comment character), skip blank lines, join adjacent lines by 18 escaping the newline (ie. backslash at end of line), strip 19 leading and/or trailing whitespace. All of these are optional 20 and independently controllable. 21 22 Provides a 'warn()' method so you can generate warning messages that 23 report physical line number, even if the logical line in question 24 spans multiple physical lines. Also provides 'unreadline()' for 25 implementing line-at-a-time lookahead. 26 27 Constructor is called as: 28 29 TextFile (filename=None, file=None, **options) 30 31 It bombs (RuntimeError) if both 'filename' and 'file' are None; 32 'filename' should be a string, and 'file' a file object (or 33 something that provides 'readline()' and 'close()' methods). It is 34 recommended that you supply at least 'filename', so that TextFile 35 can include it in warning messages. If 'file' is not supplied, 36 TextFile creates its own using the 'open()' builtin. 37 38 The options are all boolean, and affect the value returned by 39 'readline()': 40 strip_comments [default: true] 41 strip from "#" to end-of-line, as well as any whitespace 42 leading up to the "#" -- unless it is escaped by a backslash 43 lstrip_ws [default: false] 44 strip leading whitespace from each line before returning it 45 rstrip_ws [default: true] 46 strip trailing whitespace (including line terminator!) from 47 each line before returning it 48 skip_blanks [default: true} 49 skip lines that are empty *after* stripping comments and 50 whitespace. (If both lstrip_ws and rstrip_ws are false, 51 then some lines may consist of solely whitespace: these will 52 *not* be skipped, even if 'skip_blanks' is true.) 53 join_lines [default: false] 54 if a backslash is the last non-newline character on a line 55 after stripping comments and whitespace, join the following line 56 to it to form one "logical line"; if N consecutive lines end 57 with a backslash, then N+1 physical lines will be joined to 58 form one logical line. 59 collapse_join [default: false] 60 strip leading whitespace from lines that are joined to their 61 predecessor; only matters if (join_lines and not lstrip_ws) 62 63 Note that since 'rstrip_ws' can strip the trailing newline, the 64 semantics of 'readline()' must differ from those of the builtin file 65 object's 'readline()' method! In particular, 'readline()' returns 66 None for end-of-file: an empty string might just be a blank line (or 67 an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is 68 not.""" 69 70 default_options = { 'strip_comments': 1, 71 'skip_blanks': 1, 72 'lstrip_ws': 0, 73 'rstrip_ws': 1, 74 'join_lines': 0, 75 'collapse_join': 0, 76 } 77 78 def __init__ (self, filename=None, file=None, **options): 79 """Construct a new TextFile object. At least one of 'filename' 80 (a string) and 'file' (a file-like object) must be supplied. 81 They keyword argument options are described above and affect 82 the values returned by 'readline()'.""" 83 84 if filename is None and file is None: 85 raise RuntimeError, \ 86 "you must supply either or both of 'filename' and 'file'" 87 88 # set values for all options -- either from client option hash 89 # or fallback to default_options 90 for opt in self.default_options.keys(): 91 if opt in options: 92 setattr (self, opt, options[opt]) 93 94 else: 95 setattr (self, opt, self.default_options[opt]) 96 97 # sanity check client option hash 98 for opt in options.keys(): 99 if opt not in self.default_options: 100 raise KeyError, "invalid TextFile option '%s'" % opt 101 102 if file is None: 103 self.open (filename) 104 else: 105 self.filename = filename 106 self.file = file 107 self.current_line = 0 # assuming that file is at BOF! 108 109 # 'linebuf' is a stack of lines that will be emptied before we 110 # actually read from the file; it's only populated by an 111 # 'unreadline()' operation 112 self.linebuf = [] 113 114 115 def open (self, filename): 116 """Open a new file named 'filename'. This overrides both the 117 'filename' and 'file' arguments to the constructor.""" 118 119 self.filename = filename 120 self.file = open (self.filename, 'r') 121 self.current_line = 0 122 123 124 def close (self): 125 """Close the current file and forget everything we know about it 126 (filename, current line number).""" 127 128 self.file.close () 129 self.file = None 130 self.filename = None 131 self.current_line = None 132 133 134 def gen_error (self, msg, line=None): 135 outmsg = [] 136 if line is None: 137 line = self.current_line 138 outmsg.append(self.filename + ", ") 139 if isinstance(line, (list, tuple)): 140 outmsg.append("lines %d-%d: " % tuple (line)) 141 else: 142 outmsg.append("line %d: " % line) 143 outmsg.append(str(msg)) 144 return ''.join(outmsg) 145 146 147 def error (self, msg, line=None): 148 raise ValueError, "error: " + self.gen_error(msg, line) 149 150 def warn (self, msg, line=None): 151 """Print (to stderr) a warning message tied to the current logical 152 line in the current file. If the current logical line in the 153 file spans multiple physical lines, the warning refers to the 154 whole range, eg. "lines 3-5". If 'line' supplied, it overrides 155 the current line number; it may be a list or tuple to indicate a 156 range of physical lines, or an integer for a single physical 157 line.""" 158 sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n") 159 160 161 def readline (self): 162 """Read and return a single logical line from the current file (or 163 from an internal buffer if lines have previously been "unread" 164 with 'unreadline()'). If the 'join_lines' option is true, this 165 may involve reading multiple physical lines concatenated into a 166 single string. Updates the current line number, so calling 167 'warn()' after 'readline()' emits a warning about the physical 168 line(s) just read. Returns None on end-of-file, since the empty 169 string can occur if 'rstrip_ws' is true but 'strip_blanks' is 170 not.""" 171 172 # If any "unread" lines waiting in 'linebuf', return the top 173 # one. (We don't actually buffer read-ahead data -- lines only 174 # get put in 'linebuf' if the client explicitly does an 175 # 'unreadline()'. 176 if self.linebuf: 177 line = self.linebuf[-1] 178 del self.linebuf[-1] 179 return line 180 181 buildup_line = '' 182 183 while 1: 184 # read the line, make it None if EOF 185 line = self.file.readline() 186 if line == '': line = None 187 188 if self.strip_comments and line: 189 190 # Look for the first "#" in the line. If none, never 191 # mind. If we find one and it's the first character, or 192 # is not preceded by "\", then it starts a comment -- 193 # strip the comment, strip whitespace before it, and 194 # carry on. Otherwise, it's just an escaped "#", so 195 # unescape it (and any other escaped "#"'s that might be 196 # lurking in there) and otherwise leave the line alone. 197 198 pos = line.find("#") 199 if pos == -1: # no "#" -- no comments 200 pass 201 202 # It's definitely a comment -- either "#" is the first 203 # character, or it's elsewhere and unescaped. 204 elif pos == 0 or line[pos-1] != "\\": 205 # Have to preserve the trailing newline, because it's 206 # the job of a later step (rstrip_ws) to remove it -- 207 # and if rstrip_ws is false, we'd better preserve it! 208 # (NB. this means that if the final line is all comment 209 # and has no trailing newline, we will think that it's 210 # EOF; I think that's OK.) 211 eol = (line[-1] == '\n') and '\n' or '' 212 line = line[0:pos] + eol 213 214 # If all that's left is whitespace, then skip line 215 # *now*, before we try to join it to 'buildup_line' -- 216 # that way constructs like 217 # hello \\ 218 # # comment that should be ignored 219 # there 220 # result in "hello there". 221 if line.strip() == "": 222 continue 223 224 else: # it's an escaped "#" 225 line = line.replace("\\#", "#") 226 227 228 # did previous line end with a backslash? then accumulate 229 if self.join_lines and buildup_line: 230 # oops: end of file 231 if line is None: 232 self.warn ("continuation line immediately precedes " 233 "end-of-file") 234 return buildup_line 235 236 if self.collapse_join: 237 line = line.lstrip() 238 line = buildup_line + line 239 240 # careful: pay attention to line number when incrementing it 241 if isinstance(self.current_line, list): 242 self.current_line[1] = self.current_line[1] + 1 243 else: 244 self.current_line = [self.current_line, 245 self.current_line+1] 246 # just an ordinary line, read it as usual 247 else: 248 if line is None: # eof 249 return None 250 251 # still have to be careful about incrementing the line number! 252 if isinstance(self.current_line, list): 253 self.current_line = self.current_line[1] + 1 254 else: 255 self.current_line = self.current_line + 1 256 257 258 # strip whitespace however the client wants (leading and 259 # trailing, or one or the other, or neither) 260 if self.lstrip_ws and self.rstrip_ws: 261 line = line.strip() 262 elif self.lstrip_ws: 263 line = line.lstrip() 264 elif self.rstrip_ws: 265 line = line.rstrip() 266 267 # blank line (whether we rstrip'ed or not)? skip to next line 268 # if appropriate 269 if (line == '' or line == '\n') and self.skip_blanks: 270 continue 271 272 if self.join_lines: 273 if line[-1] == '\\': 274 buildup_line = line[:-1] 275 continue 276 277 if line[-2:] == '\\\n': 278 buildup_line = line[0:-2] + '\n' 279 continue 280 281 # well, I guess there's some actual content there: return it 282 return line 283 284 # readline () 285 286 287 def readlines (self): 288 """Read and return the list of all logical lines remaining in the 289 current file.""" 290 291 lines = [] 292 while 1: 293 line = self.readline() 294 if line is None: 295 return lines 296 lines.append (line) 297 298 299 def unreadline (self, line): 300 """Push 'line' (a string) onto an internal buffer that will be 301 checked by future 'readline()' calls. Handy for implementing 302 a parser with line-at-a-time lookahead.""" 303 304 self.linebuf.append (line) 305