Home | History | Annotate | Download | only in distutils
      1 """text_file
      2 
      3 provides the TextFile class, which gives an interface to text files
      4 that (optionally) takes care of stripping comments, ignoring blank
      5 lines, and joining lines with backslashes."""
      6 
      7 import sys, io
      8 
      9 
     10 class TextFile:
     11     """Provides a file-like object that takes care of all the things you
     12        commonly want to do when processing a text file that has some
     13        line-by-line syntax: strip comments (as long as "#" is your
     14        comment character), skip blank lines, join adjacent lines by
     15        escaping the newline (ie. backslash at end of line), strip
     16        leading and/or trailing whitespace.  All of these are optional
     17        and independently controllable.
     18 
     19        Provides a 'warn()' method so you can generate warning messages that
     20        report physical line number, even if the logical line in question
     21        spans multiple physical lines.  Also provides 'unreadline()' for
     22        implementing line-at-a-time lookahead.
     23 
     24        Constructor is called as:
     25 
     26            TextFile (filename=None, file=None, **options)
     27 
     28        It bombs (RuntimeError) if both 'filename' and 'file' are None;
     29        'filename' should be a string, and 'file' a file object (or
     30        something that provides 'readline()' and 'close()' methods).  It is
     31        recommended that you supply at least 'filename', so that TextFile
     32        can include it in warning messages.  If 'file' is not supplied,
     33        TextFile creates its own using 'io.open()'.
     34 
     35        The options are all boolean, and affect the value returned by
     36        'readline()':
     37          strip_comments [default: true]
     38            strip from "#" to end-of-line, as well as any whitespace
     39            leading up to the "#" -- unless it is escaped by a backslash
     40          lstrip_ws [default: false]
     41            strip leading whitespace from each line before returning it
     42          rstrip_ws [default: true]
     43            strip trailing whitespace (including line terminator!) from
     44            each line before returning it
     45          skip_blanks [default: true}
     46            skip lines that are empty *after* stripping comments and
     47            whitespace.  (If both lstrip_ws and rstrip_ws are false,
     48            then some lines may consist of solely whitespace: these will
     49            *not* be skipped, even if 'skip_blanks' is true.)
     50          join_lines [default: false]
     51            if a backslash is the last non-newline character on a line
     52            after stripping comments and whitespace, join the following line
     53            to it to form one "logical line"; if N consecutive lines end
     54            with a backslash, then N+1 physical lines will be joined to
     55            form one logical line.
     56          collapse_join [default: false]
     57            strip leading whitespace from lines that are joined to their
     58            predecessor; only matters if (join_lines and not lstrip_ws)
     59          errors [default: 'strict']
     60            error handler used to decode the file content
     61 
     62        Note that since 'rstrip_ws' can strip the trailing newline, the
     63        semantics of 'readline()' must differ from those of the builtin file
     64        object's 'readline()' method!  In particular, 'readline()' returns
     65        None for end-of-file: an empty string might just be a blank line (or
     66        an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
     67        not."""
     68 
     69     default_options = { 'strip_comments': 1,
     70                         'skip_blanks':    1,
     71                         'lstrip_ws':      0,
     72                         'rstrip_ws':      1,
     73                         'join_lines':     0,
     74                         'collapse_join':  0,
     75                         'errors':         'strict',
     76                       }
     77 
     78     def __init__(self, filename=None, file=None, **options):
     79         """Construct a new TextFile object.  At least one of 'filename'
     80            (a string) and 'file' (a file-like object) must be supplied.
     81            They keyword argument options are described above and affect
     82            the values returned by 'readline()'."""
     83         if filename is None and file is None:
     84             raise RuntimeError("you must supply either or both of 'filename' and 'file'")
     85 
     86         # set values for all options -- either from client option hash
     87         # or fallback to default_options
     88         for opt in self.default_options.keys():
     89             if opt in options:
     90                 setattr(self, opt, options[opt])
     91             else:
     92                 setattr(self, opt, self.default_options[opt])
     93 
     94         # sanity check client option hash
     95         for opt in options.keys():
     96             if opt not in self.default_options:
     97                 raise KeyError("invalid TextFile option '%s'" % opt)
     98 
     99         if file is None:
    100             self.open(filename)
    101         else:
    102             self.filename = filename
    103             self.file = file
    104             self.current_line = 0       # assuming that file is at BOF!
    105 
    106         # 'linebuf' is a stack of lines that will be emptied before we
    107         # actually read from the file; it's only populated by an
    108         # 'unreadline()' operation
    109         self.linebuf = []
    110 
    111     def open(self, filename):
    112         """Open a new file named 'filename'.  This overrides both the
    113            'filename' and 'file' arguments to the constructor."""
    114         self.filename = filename
    115         self.file = io.open(self.filename, 'r', errors=self.errors)
    116         self.current_line = 0
    117 
    118     def close(self):
    119         """Close the current file and forget everything we know about it
    120            (filename, current line number)."""
    121         file = self.file
    122         self.file = None
    123         self.filename = None
    124         self.current_line = None
    125         file.close()
    126 
    127     def gen_error(self, msg, line=None):
    128         outmsg = []
    129         if line is None:
    130             line = self.current_line
    131         outmsg.append(self.filename + ", ")
    132         if isinstance(line, (list, tuple)):
    133             outmsg.append("lines %d-%d: " % tuple(line))
    134         else:
    135             outmsg.append("line %d: " % line)
    136         outmsg.append(str(msg))
    137         return "".join(outmsg)
    138 
    139     def error(self, msg, line=None):
    140         raise ValueError("error: " + self.gen_error(msg, line))
    141 
    142     def warn(self, msg, line=None):
    143         """Print (to stderr) a warning message tied to the current logical
    144            line in the current file.  If the current logical line in the
    145            file spans multiple physical lines, the warning refers to the
    146            whole range, eg. "lines 3-5".  If 'line' supplied, it overrides
    147            the current line number; it may be a list or tuple to indicate a
    148            range of physical lines, or an integer for a single physical
    149            line."""
    150         sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
    151 
    152     def readline(self):
    153         """Read and return a single logical line from the current file (or
    154            from an internal buffer if lines have previously been "unread"
    155            with 'unreadline()').  If the 'join_lines' option is true, this
    156            may involve reading multiple physical lines concatenated into a
    157            single string.  Updates the current line number, so calling
    158            'warn()' after 'readline()' emits a warning about the physical
    159            line(s) just read.  Returns None on end-of-file, since the empty
    160            string can occur if 'rstrip_ws' is true but 'strip_blanks' is
    161            not."""
    162         # If any "unread" lines waiting in 'linebuf', return the top
    163         # one.  (We don't actually buffer read-ahead data -- lines only
    164         # get put in 'linebuf' if the client explicitly does an
    165         # 'unreadline()'.
    166         if self.linebuf:
    167             line = self.linebuf[-1]
    168             del self.linebuf[-1]
    169             return line
    170 
    171         buildup_line = ''
    172 
    173         while True:
    174             # read the line, make it None if EOF
    175             line = self.file.readline()
    176             if line == '':
    177                 line = None
    178 
    179             if self.strip_comments and line:
    180 
    181                 # Look for the first "#" in the line.  If none, never
    182                 # mind.  If we find one and it's the first character, or
    183                 # is not preceded by "\", then it starts a comment --
    184                 # strip the comment, strip whitespace before it, and
    185                 # carry on.  Otherwise, it's just an escaped "#", so
    186                 # unescape it (and any other escaped "#"'s that might be
    187                 # lurking in there) and otherwise leave the line alone.
    188 
    189                 pos = line.find("#")
    190                 if pos == -1: # no "#" -- no comments
    191                     pass
    192 
    193                 # It's definitely a comment -- either "#" is the first
    194                 # character, or it's elsewhere and unescaped.
    195                 elif pos == 0 or line[pos-1] != "\\":
    196                     # Have to preserve the trailing newline, because it's
    197                     # the job of a later step (rstrip_ws) to remove it --
    198                     # and if rstrip_ws is false, we'd better preserve it!
    199                     # (NB. this means that if the final line is all comment
    200                     # and has no trailing newline, we will think that it's
    201                     # EOF; I think that's OK.)
    202                     eol = (line[-1] == '\n') and '\n' or ''
    203                     line = line[0:pos] + eol
    204 
    205                     # If all that's left is whitespace, then skip line
    206                     # *now*, before we try to join it to 'buildup_line' --
    207                     # that way constructs like
    208                     #   hello \\
    209                     #   # comment that should be ignored
    210                     #   there
    211                     # result in "hello there".
    212                     if line.strip() == "":
    213                         continue
    214                 else: # it's an escaped "#"
    215                     line = line.replace("\\#", "#")
    216 
    217             # did previous line end with a backslash? then accumulate
    218             if self.join_lines and buildup_line:
    219                 # oops: end of file
    220                 if line is None:
    221                     self.warn("continuation line immediately precedes "
    222                               "end-of-file")
    223                     return buildup_line
    224 
    225                 if self.collapse_join:
    226                     line = line.lstrip()
    227                 line = buildup_line + line
    228 
    229                 # careful: pay attention to line number when incrementing it
    230                 if isinstance(self.current_line, list):
    231                     self.current_line[1] = self.current_line[1] + 1
    232                 else:
    233                     self.current_line = [self.current_line,
    234                                          self.current_line + 1]
    235             # just an ordinary line, read it as usual
    236             else:
    237                 if line is None: # eof
    238                     return None
    239 
    240                 # still have to be careful about incrementing the line number!
    241                 if isinstance(self.current_line, list):
    242                     self.current_line = self.current_line[1] + 1
    243                 else:
    244                     self.current_line = self.current_line + 1
    245 
    246             # strip whitespace however the client wants (leading and
    247             # trailing, or one or the other, or neither)
    248             if self.lstrip_ws and self.rstrip_ws:
    249                 line = line.strip()
    250             elif self.lstrip_ws:
    251                 line = line.lstrip()
    252             elif self.rstrip_ws:
    253                 line = line.rstrip()
    254 
    255             # blank line (whether we rstrip'ed or not)? skip to next line
    256             # if appropriate
    257             if (line == '' or line == '\n') and self.skip_blanks:
    258                 continue
    259 
    260             if self.join_lines:
    261                 if line[-1] == '\\':
    262                     buildup_line = line[:-1]
    263                     continue
    264 
    265                 if line[-2:] == '\\\n':
    266                     buildup_line = line[0:-2] + '\n'
    267                     continue
    268 
    269             # well, I guess there's some actual content there: return it
    270             return line
    271 
    272     def readlines(self):
    273         """Read and return the list of all logical lines remaining in the
    274            current file."""
    275         lines = []
    276         while True:
    277             line = self.readline()
    278             if line is None:
    279                 return lines
    280             lines.append(line)
    281 
    282     def unreadline(self, line):
    283         """Push 'line' (a string) onto an internal buffer that will be
    284            checked by future 'readline()' calls.  Handy for implementing
    285            a parser with line-at-a-time lookahead."""
    286         self.linebuf.append(line)
    287