Home | History | Annotate | Download | only in Lib
      1 """Conversion pipeline templates.
      2 
      3 The problem:
      4 ------------
      5 
      6 Suppose you have some data that you want to convert to another format,
      7 such as from GIF image format to PPM image format.  Maybe the
      8 conversion involves several steps (e.g. piping it through compress or
      9 uuencode).  Some of the conversion steps may require that their input
     10 is a disk file, others may be able to read standard input; similar for
     11 their output.  The input to the entire conversion may also be read
     12 from a disk file or from an open file, and similar for its output.
     13 
     14 The module lets you construct a pipeline template by sticking one or
     15 more conversion steps together.  It will take care of creating and
     16 removing temporary files if they are necessary to hold intermediate
     17 data.  You can then use the template to do conversions from many
     18 different sources to many different destinations.  The temporary
     19 file names used are different each time the template is used.
     20 
     21 The templates are objects so you can create templates for many
     22 different conversion steps and store them in a dictionary, for
     23 instance.
     24 
     25 
     26 Directions:
     27 -----------
     28 
     29 To create a template:
     30     t = Template()
     31 
     32 To add a conversion step to a template:
     33    t.append(command, kind)
     34 where kind is a string of two characters: the first is '-' if the
     35 command reads its standard input or 'f' if it requires a file; the
     36 second likewise for the output. The command must be valid /bin/sh
     37 syntax.  If input or output files are required, they are passed as
     38 $IN and $OUT; otherwise, it must be  possible to use the command in
     39 a pipeline.
     40 
     41 To add a conversion step at the beginning:
     42    t.prepend(command, kind)
     43 
     44 To convert a file to another file using a template:
     45   sts = t.copy(infile, outfile)
     46 If infile or outfile are the empty string, standard input is read or
     47 standard output is written, respectively.  The return value is the
     48 exit status of the conversion pipeline.
     49 
     50 To open a file for reading or writing through a conversion pipeline:
     51    fp = t.open(file, mode)
     52 where mode is 'r' to read the file, or 'w' to write it -- just like
     53 for the built-in function open() or for os.popen().
     54 
     55 To create a new template object initialized to a given one:
     56    t2 = t.clone()
     57 """                                     # '
     58 
     59 
     60 import re
     61 import os
     62 import tempfile
     63 # we import the quote function rather than the module for backward compat
     64 # (quote used to be an undocumented but used function in pipes)
     65 from shlex import quote
     66 
     67 __all__ = ["Template"]
     68 
     69 # Conversion step kinds
     70 
     71 FILEIN_FILEOUT = 'ff'                   # Must read & write real files
     72 STDIN_FILEOUT  = '-f'                   # Must write a real file
     73 FILEIN_STDOUT  = 'f-'                   # Must read a real file
     74 STDIN_STDOUT   = '--'                   # Normal pipeline element
     75 SOURCE         = '.-'                   # Must be first, writes stdout
     76 SINK           = '-.'                   # Must be last, reads stdin
     77 
     78 stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
     79              SOURCE, SINK]
     80 
     81 
     82 class Template:
     83     """Class representing a pipeline template."""
     84 
     85     def __init__(self):
     86         """Template() returns a fresh pipeline template."""
     87         self.debugging = 0
     88         self.reset()
     89 
     90     def __repr__(self):
     91         """t.__repr__() implements repr(t)."""
     92         return '<Template instance, steps=%r>' % (self.steps,)
     93 
     94     def reset(self):
     95         """t.reset() restores a pipeline template to its initial state."""
     96         self.steps = []
     97 
     98     def clone(self):
     99         """t.clone() returns a new pipeline template with identical
    100         initial state as the current one."""
    101         t = Template()
    102         t.steps = self.steps[:]
    103         t.debugging = self.debugging
    104         return t
    105 
    106     def debug(self, flag):
    107         """t.debug(flag) turns debugging on or off."""
    108         self.debugging = flag
    109 
    110     def append(self, cmd, kind):
    111         """t.append(cmd, kind) adds a new step at the end."""
    112         if type(cmd) is not type(''):
    113             raise TypeError('Template.append: cmd must be a string')
    114         if kind not in stepkinds:
    115             raise ValueError('Template.append: bad kind %r' % (kind,))
    116         if kind == SOURCE:
    117             raise ValueError('Template.append: SOURCE can only be prepended')
    118         if self.steps and self.steps[-1][1] == SINK:
    119             raise ValueError('Template.append: already ends with SINK')
    120         if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
    121             raise ValueError('Template.append: missing $IN in cmd')
    122         if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
    123             raise ValueError('Template.append: missing $OUT in cmd')
    124         self.steps.append((cmd, kind))
    125 
    126     def prepend(self, cmd, kind):
    127         """t.prepend(cmd, kind) adds a new step at the front."""
    128         if type(cmd) is not type(''):
    129             raise TypeError('Template.prepend: cmd must be a string')
    130         if kind not in stepkinds:
    131             raise ValueError('Template.prepend: bad kind %r' % (kind,))
    132         if kind == SINK:
    133             raise ValueError('Template.prepend: SINK can only be appended')
    134         if self.steps and self.steps[0][1] == SOURCE:
    135             raise ValueError('Template.prepend: already begins with SOURCE')
    136         if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
    137             raise ValueError('Template.prepend: missing $IN in cmd')
    138         if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
    139             raise ValueError('Template.prepend: missing $OUT in cmd')
    140         self.steps.insert(0, (cmd, kind))
    141 
    142     def open(self, file, rw):
    143         """t.open(file, rw) returns a pipe or file object open for
    144         reading or writing; the file is the other end of the pipeline."""
    145         if rw == 'r':
    146             return self.open_r(file)
    147         if rw == 'w':
    148             return self.open_w(file)
    149         raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r'
    150                          % (rw,))
    151 
    152     def open_r(self, file):
    153         """t.open_r(file) and t.open_w(file) implement
    154         t.open(file, 'r') and t.open(file, 'w') respectively."""
    155         if not self.steps:
    156             return open(file, 'r')
    157         if self.steps[-1][1] == SINK:
    158             raise ValueError('Template.open_r: pipeline ends width SINK')
    159         cmd = self.makepipeline(file, '')
    160         return os.popen(cmd, 'r')
    161 
    162     def open_w(self, file):
    163         if not self.steps:
    164             return open(file, 'w')
    165         if self.steps[0][1] == SOURCE:
    166             raise ValueError('Template.open_w: pipeline begins with SOURCE')
    167         cmd = self.makepipeline('', file)
    168         return os.popen(cmd, 'w')
    169 
    170     def copy(self, infile, outfile):
    171         return os.system(self.makepipeline(infile, outfile))
    172 
    173     def makepipeline(self, infile, outfile):
    174         cmd = makepipeline(infile, self.steps, outfile)
    175         if self.debugging:
    176             print(cmd)
    177             cmd = 'set -x; ' + cmd
    178         return cmd
    179 
    180 
    181 def makepipeline(infile, steps, outfile):
    182     # Build a list with for each command:
    183     # [input filename or '', command string, kind, output filename or '']
    184 
    185     list = []
    186     for cmd, kind in steps:
    187         list.append(['', cmd, kind, ''])
    188     #
    189     # Make sure there is at least one step
    190     #
    191     if not list:
    192         list.append(['', 'cat', '--', ''])
    193     #
    194     # Take care of the input and output ends
    195     #
    196     [cmd, kind] = list[0][1:3]
    197     if kind[0] == 'f' and not infile:
    198         list.insert(0, ['', 'cat', '--', ''])
    199     list[0][0] = infile
    200     #
    201     [cmd, kind] = list[-1][1:3]
    202     if kind[1] == 'f' and not outfile:
    203         list.append(['', 'cat', '--', ''])
    204     list[-1][-1] = outfile
    205     #
    206     # Invent temporary files to connect stages that need files
    207     #
    208     garbage = []
    209     for i in range(1, len(list)):
    210         lkind = list[i-1][2]
    211         rkind = list[i][2]
    212         if lkind[1] == 'f' or rkind[0] == 'f':
    213             (fd, temp) = tempfile.mkstemp()
    214             os.close(fd)
    215             garbage.append(temp)
    216             list[i-1][-1] = list[i][0] = temp
    217     #
    218     for item in list:
    219         [inf, cmd, kind, outf] = item
    220         if kind[1] == 'f':
    221             cmd = 'OUT=' + quote(outf) + '; ' + cmd
    222         if kind[0] == 'f':
    223             cmd = 'IN=' + quote(inf) + '; ' + cmd
    224         if kind[0] == '-' and inf:
    225             cmd = cmd + ' <' + quote(inf)
    226         if kind[1] == '-' and outf:
    227             cmd = cmd + ' >' + quote(outf)
    228         item[1] = cmd
    229     #
    230     cmdlist = list[0][1]
    231     for item in list[1:]:
    232         [cmd, kind] = item[1:3]
    233         if item[0] == '':
    234             if 'f' in kind:
    235                 cmd = '{ ' + cmd + '; }'
    236             cmdlist = cmdlist + ' |\n' + cmd
    237         else:
    238             cmdlist = cmdlist + '\n' + cmd
    239     #
    240     if garbage:
    241         rmcmd = 'rm -f'
    242         for file in garbage:
    243             rmcmd = rmcmd + ' ' + quote(file)
    244         trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
    245         cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
    246     #
    247     return cmdlist
    248