Home | History | Annotate | Download | only in distutils
      1 """text_file
      2 
      3 provides the TextFile class, which gives an interface to text files
      4 that (optionally) takes care of stripping comments, ignoring blank
      5 lines, and joining lines with backslashes."""
      6 
      7 __revision__ = "$Id$"
      8 
      9 import sys
     10 
     11 
     12 class TextFile:
     13 
     14     """Provides a file-like object that takes care of all the things you
     15        commonly want to do when processing a text file that has some
     16        line-by-line syntax: strip comments (as long as "#" is your
     17        comment character), skip blank lines, join adjacent lines by
     18        escaping the newline (ie. backslash at end of line), strip
     19        leading and/or trailing whitespace.  All of these are optional
     20        and independently controllable.
     21 
     22        Provides a 'warn()' method so you can generate warning messages that
     23        report physical line number, even if the logical line in question
     24        spans multiple physical lines.  Also provides 'unreadline()' for
     25        implementing line-at-a-time lookahead.
     26 
     27        Constructor is called as:
     28 
     29            TextFile (filename=None, file=None, **options)
     30 
     31        It bombs (RuntimeError) if both 'filename' and 'file' are None;
     32        'filename' should be a string, and 'file' a file object (or
     33        something that provides 'readline()' and 'close()' methods).  It is
     34        recommended that you supply at least 'filename', so that TextFile
     35        can include it in warning messages.  If 'file' is not supplied,
     36        TextFile creates its own using the 'open()' builtin.
     37 
     38        The options are all boolean, and affect the value returned by
     39        'readline()':
     40          strip_comments [default: true]
     41            strip from "#" to end-of-line, as well as any whitespace
     42            leading up to the "#" -- unless it is escaped by a backslash
     43          lstrip_ws [default: false]
     44            strip leading whitespace from each line before returning it
     45          rstrip_ws [default: true]
     46            strip trailing whitespace (including line terminator!) from
     47            each line before returning it
     48          skip_blanks [default: true}
     49            skip lines that are empty *after* stripping comments and
     50            whitespace.  (If both lstrip_ws and rstrip_ws are false,
     51            then some lines may consist of solely whitespace: these will
     52            *not* be skipped, even if 'skip_blanks' is true.)
     53          join_lines [default: false]
     54            if a backslash is the last non-newline character on a line
     55            after stripping comments and whitespace, join the following line
     56            to it to form one "logical line"; if N consecutive lines end
     57            with a backslash, then N+1 physical lines will be joined to
     58            form one logical line.
     59          collapse_join [default: false]
     60            strip leading whitespace from lines that are joined to their
     61            predecessor; only matters if (join_lines and not lstrip_ws)
     62 
     63        Note that since 'rstrip_ws' can strip the trailing newline, the
     64        semantics of 'readline()' must differ from those of the builtin file
     65        object's 'readline()' method!  In particular, 'readline()' returns
     66        None for end-of-file: an empty string might just be a blank line (or
     67        an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
     68        not."""
     69 
     70     default_options = { 'strip_comments': 1,
     71                         'skip_blanks':    1,
     72                         'lstrip_ws':      0,
     73                         'rstrip_ws':      1,
     74                         'join_lines':     0,
     75                         'collapse_join':  0,
     76                       }
     77 
     78     def __init__ (self, filename=None, file=None, **options):
     79         """Construct a new TextFile object.  At least one of 'filename'
     80            (a string) and 'file' (a file-like object) must be supplied.
     81            They keyword argument options are described above and affect
     82            the values returned by 'readline()'."""
     83 
     84         if filename is None and file is None:
     85             raise RuntimeError, \
     86                   "you must supply either or both of 'filename' and 'file'"
     87 
     88         # set values for all options -- either from client option hash
     89         # or fallback to default_options
     90         for opt in self.default_options.keys():
     91             if opt in options:
     92                 setattr (self, opt, options[opt])
     93 
     94             else:
     95                 setattr (self, opt, self.default_options[opt])
     96 
     97         # sanity check client option hash
     98         for opt in options.keys():
     99             if opt not in self.default_options:
    100                 raise KeyError, "invalid TextFile option '%s'" % opt
    101 
    102         if file is None:
    103             self.open (filename)
    104         else:
    105             self.filename = filename
    106             self.file = file
    107             self.current_line = 0       # assuming that file is at BOF!
    108 
    109         # 'linebuf' is a stack of lines that will be emptied before we
    110         # actually read from the file; it's only populated by an
    111         # 'unreadline()' operation
    112         self.linebuf = []
    113 
    114 
    115     def open (self, filename):
    116         """Open a new file named 'filename'.  This overrides both the
    117            'filename' and 'file' arguments to the constructor."""
    118 
    119         self.filename = filename
    120         self.file = open (self.filename, 'r')
    121         self.current_line = 0
    122 
    123 
    124     def close (self):
    125         """Close the current file and forget everything we know about it
    126            (filename, current line number)."""
    127 
    128         self.file.close ()
    129         self.file = None
    130         self.filename = None
    131         self.current_line = None
    132 
    133 
    134     def gen_error (self, msg, line=None):
    135         outmsg = []
    136         if line is None:
    137             line = self.current_line
    138         outmsg.append(self.filename + ", ")
    139         if isinstance(line, (list, tuple)):
    140             outmsg.append("lines %d-%d: " % tuple (line))
    141         else:
    142             outmsg.append("line %d: " % line)
    143         outmsg.append(str(msg))
    144         return ''.join(outmsg)
    145 
    146 
    147     def error (self, msg, line=None):
    148         raise ValueError, "error: " + self.gen_error(msg, line)
    149 
    150     def warn (self, msg, line=None):
    151         """Print (to stderr) a warning message tied to the current logical
    152            line in the current file.  If the current logical line in the
    153            file spans multiple physical lines, the warning refers to the
    154            whole range, eg. "lines 3-5".  If 'line' supplied, it overrides
    155            the current line number; it may be a list or tuple to indicate a
    156            range of physical lines, or an integer for a single physical
    157            line."""
    158         sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
    159 
    160 
    161     def readline (self):
    162         """Read and return a single logical line from the current file (or
    163            from an internal buffer if lines have previously been "unread"
    164            with 'unreadline()').  If the 'join_lines' option is true, this
    165            may involve reading multiple physical lines concatenated into a
    166            single string.  Updates the current line number, so calling
    167            'warn()' after 'readline()' emits a warning about the physical
    168            line(s) just read.  Returns None on end-of-file, since the empty
    169            string can occur if 'rstrip_ws' is true but 'strip_blanks' is
    170            not."""
    171 
    172         # If any "unread" lines waiting in 'linebuf', return the top
    173         # one.  (We don't actually buffer read-ahead data -- lines only
    174         # get put in 'linebuf' if the client explicitly does an
    175         # 'unreadline()'.
    176         if self.linebuf:
    177             line = self.linebuf[-1]
    178             del self.linebuf[-1]
    179             return line
    180 
    181         buildup_line = ''
    182 
    183         while 1:
    184             # read the line, make it None if EOF
    185             line = self.file.readline()
    186             if line == '': line = None
    187 
    188             if self.strip_comments and line:
    189 
    190                 # Look for the first "#" in the line.  If none, never
    191                 # mind.  If we find one and it's the first character, or
    192                 # is not preceded by "\", then it starts a comment --
    193                 # strip the comment, strip whitespace before it, and
    194                 # carry on.  Otherwise, it's just an escaped "#", so
    195                 # unescape it (and any other escaped "#"'s that might be
    196                 # lurking in there) and otherwise leave the line alone.
    197 
    198                 pos = line.find("#")
    199                 if pos == -1:           # no "#" -- no comments
    200                     pass
    201 
    202                 # It's definitely a comment -- either "#" is the first
    203                 # character, or it's elsewhere and unescaped.
    204                 elif pos == 0 or line[pos-1] != "\\":
    205                     # Have to preserve the trailing newline, because it's
    206                     # the job of a later step (rstrip_ws) to remove it --
    207                     # and if rstrip_ws is false, we'd better preserve it!
    208                     # (NB. this means that if the final line is all comment
    209                     # and has no trailing newline, we will think that it's
    210                     # EOF; I think that's OK.)
    211                     eol = (line[-1] == '\n') and '\n' or ''
    212                     line = line[0:pos] + eol
    213 
    214                     # If all that's left is whitespace, then skip line
    215                     # *now*, before we try to join it to 'buildup_line' --
    216                     # that way constructs like
    217                     #   hello \\
    218                     #   # comment that should be ignored
    219                     #   there
    220                     # result in "hello there".
    221                     if line.strip() == "":
    222                         continue
    223 
    224                 else:                   # it's an escaped "#"
    225                     line = line.replace("\\#", "#")
    226 
    227 
    228             # did previous line end with a backslash? then accumulate
    229             if self.join_lines and buildup_line:
    230                 # oops: end of file
    231                 if line is None:
    232                     self.warn ("continuation line immediately precedes "
    233                                "end-of-file")
    234                     return buildup_line
    235 
    236                 if self.collapse_join:
    237                     line = line.lstrip()
    238                 line = buildup_line + line
    239 
    240                 # careful: pay attention to line number when incrementing it
    241                 if isinstance(self.current_line, list):
    242                     self.current_line[1] = self.current_line[1] + 1
    243                 else:
    244                     self.current_line = [self.current_line,
    245                                          self.current_line+1]
    246             # just an ordinary line, read it as usual
    247             else:
    248                 if line is None:        # eof
    249                     return None
    250 
    251                 # still have to be careful about incrementing the line number!
    252                 if isinstance(self.current_line, list):
    253                     self.current_line = self.current_line[1] + 1
    254                 else:
    255                     self.current_line = self.current_line + 1
    256 
    257 
    258             # strip whitespace however the client wants (leading and
    259             # trailing, or one or the other, or neither)
    260             if self.lstrip_ws and self.rstrip_ws:
    261                 line = line.strip()
    262             elif self.lstrip_ws:
    263                 line = line.lstrip()
    264             elif self.rstrip_ws:
    265                 line = line.rstrip()
    266 
    267             # blank line (whether we rstrip'ed or not)? skip to next line
    268             # if appropriate
    269             if (line == '' or line == '\n') and self.skip_blanks:
    270                 continue
    271 
    272             if self.join_lines:
    273                 if line[-1] == '\\':
    274                     buildup_line = line[:-1]
    275                     continue
    276 
    277                 if line[-2:] == '\\\n':
    278                     buildup_line = line[0:-2] + '\n'
    279                     continue
    280 
    281             # well, I guess there's some actual content there: return it
    282             return line
    283 
    284     # readline ()
    285 
    286 
    287     def readlines (self):
    288         """Read and return the list of all logical lines remaining in the
    289            current file."""
    290 
    291         lines = []
    292         while 1:
    293             line = self.readline()
    294             if line is None:
    295                 return lines
    296             lines.append (line)
    297 
    298 
    299     def unreadline (self, line):
    300         """Push 'line' (a string) onto an internal buffer that will be
    301            checked by future 'readline()' calls.  Handy for implementing
    302            a parser with line-at-a-time lookahead."""
    303 
    304         self.linebuf.append (line)
    305