Home | History | Annotate | Download | only in Lib
      1 """Conversion pipeline templates.
      2 
      3 The problem:
      4 ------------
      5 
      6 Suppose you have some data that you want to convert to another format,
      7 such as from GIF image format to PPM image format.  Maybe the
      8 conversion involves several steps (e.g. piping it through compress or
      9 uuencode).  Some of the conversion steps may require that their input
     10 is a disk file, others may be able to read standard input; similar for
     11 their output.  The input to the entire conversion may also be read
     12 from a disk file or from an open file, and similar for its output.
     13 
     14 The module lets you construct a pipeline template by sticking one or
     15 more conversion steps together.  It will take care of creating and
     16 removing temporary files if they are necessary to hold intermediate
     17 data.  You can then use the template to do conversions from many
     18 different sources to many different destinations.  The temporary
     19 file names used are different each time the template is used.
     20 
     21 The templates are objects so you can create templates for many
     22 different conversion steps and store them in a dictionary, for
     23 instance.
     24 
     25 
     26 Directions:
     27 -----------
     28 
     29 To create a template:
     30     t = Template()
     31 
     32 To add a conversion step to a template:
     33    t.append(command, kind)
     34 where kind is a string of two characters: the first is '-' if the
     35 command reads its standard input or 'f' if it requires a file; the
     36 second likewise for the output. The command must be valid /bin/sh
     37 syntax.  If input or output files are required, they are passed as
     38 $IN and $OUT; otherwise, it must be  possible to use the command in
     39 a pipeline.
     40 
     41 To add a conversion step at the beginning:
     42    t.prepend(command, kind)
     43 
     44 To convert a file to another file using a template:
     45   sts = t.copy(infile, outfile)
     46 If infile or outfile are the empty string, standard input is read or
     47 standard output is written, respectively.  The return value is the
     48 exit status of the conversion pipeline.
     49 
     50 To open a file for reading or writing through a conversion pipeline:
     51    fp = t.open(file, mode)
     52 where mode is 'r' to read the file, or 'w' to write it -- just like
     53 for the built-in function open() or for os.popen().
     54 
     55 To create a new template object initialized to a given one:
     56    t2 = t.clone()
     57 
     58 For an example, see the function test() at the end of the file.
     59 """                                     # '

     60 
     61 
     62 import re
     63 import os
     64 import tempfile
     65 import string
     66 
     67 __all__ = ["Template"]
     68 
     69 # Conversion step kinds

     70 
     71 FILEIN_FILEOUT = 'ff'                   # Must read & write real files

     72 STDIN_FILEOUT  = '-f'                   # Must write a real file

     73 FILEIN_STDOUT  = 'f-'                   # Must read a real file

     74 STDIN_STDOUT   = '--'                   # Normal pipeline element

     75 SOURCE         = '.-'                   # Must be first, writes stdout

     76 SINK           = '-.'                   # Must be last, reads stdin

     77 
     78 stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
     79              SOURCE, SINK]
     80 
     81 
     82 class Template:
     83     """Class representing a pipeline template."""
     84 
     85     def __init__(self):
     86         """Template() returns a fresh pipeline template."""
     87         self.debugging = 0
     88         self.reset()
     89 
     90     def __repr__(self):
     91         """t.__repr__() implements repr(t)."""
     92         return '<Template instance, steps=%r>' % (self.steps,)
     93 
     94     def reset(self):
     95         """t.reset() restores a pipeline template to its initial state."""
     96         self.steps = []
     97 
     98     def clone(self):
     99         """t.clone() returns a new pipeline template with identical
    100         initial state as the current one."""
    101         t = Template()
    102         t.steps = self.steps[:]
    103         t.debugging = self.debugging
    104         return t
    105 
    106     def debug(self, flag):
    107         """t.debug(flag) turns debugging on or off."""
    108         self.debugging = flag
    109 
    110     def append(self, cmd, kind):
    111         """t.append(cmd, kind) adds a new step at the end."""
    112         if type(cmd) is not type(''):
    113             raise TypeError, \
    114                   'Template.append: cmd must be a string'
    115         if kind not in stepkinds:
    116             raise ValueError, \
    117                   'Template.append: bad kind %r' % (kind,)
    118         if kind == SOURCE:
    119             raise ValueError, \
    120                   'Template.append: SOURCE can only be prepended'
    121         if self.steps and self.steps[-1][1] == SINK:
    122             raise ValueError, \
    123                   'Template.append: already ends with SINK'
    124         if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
    125             raise ValueError, \
    126                   'Template.append: missing $IN in cmd'
    127         if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
    128             raise ValueError, \
    129                   'Template.append: missing $OUT in cmd'
    130         self.steps.append((cmd, kind))
    131 
    132     def prepend(self, cmd, kind):
    133         """t.prepend(cmd, kind) adds a new step at the front."""
    134         if type(cmd) is not type(''):
    135             raise TypeError, \
    136                   'Template.prepend: cmd must be a string'
    137         if kind not in stepkinds:
    138             raise ValueError, \
    139                   'Template.prepend: bad kind %r' % (kind,)
    140         if kind == SINK:
    141             raise ValueError, \
    142                   'Template.prepend: SINK can only be appended'
    143         if self.steps and self.steps[0][1] == SOURCE:
    144             raise ValueError, \
    145                   'Template.prepend: already begins with SOURCE'
    146         if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
    147             raise ValueError, \
    148                   'Template.prepend: missing $IN in cmd'
    149         if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
    150             raise ValueError, \
    151                   'Template.prepend: missing $OUT in cmd'
    152         self.steps.insert(0, (cmd, kind))
    153 
    154     def open(self, file, rw):
    155         """t.open(file, rw) returns a pipe or file object open for
    156         reading or writing; the file is the other end of the pipeline."""
    157         if rw == 'r':
    158             return self.open_r(file)
    159         if rw == 'w':
    160             return self.open_w(file)
    161         raise ValueError, \
    162               'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,)
    163 
    164     def open_r(self, file):
    165         """t.open_r(file) and t.open_w(file) implement
    166         t.open(file, 'r') and t.open(file, 'w') respectively."""
    167         if not self.steps:
    168             return open(file, 'r')
    169         if self.steps[-1][1] == SINK:
    170             raise ValueError, \
    171                   'Template.open_r: pipeline ends width SINK'
    172         cmd = self.makepipeline(file, '')
    173         return os.popen(cmd, 'r')
    174 
    175     def open_w(self, file):
    176         if not self.steps:
    177             return open(file, 'w')
    178         if self.steps[0][1] == SOURCE:
    179             raise ValueError, \
    180                   'Template.open_w: pipeline begins with SOURCE'
    181         cmd = self.makepipeline('', file)
    182         return os.popen(cmd, 'w')
    183 
    184     def copy(self, infile, outfile):
    185         return os.system(self.makepipeline(infile, outfile))
    186 
    187     def makepipeline(self, infile, outfile):
    188         cmd = makepipeline(infile, self.steps, outfile)
    189         if self.debugging:
    190             print cmd
    191             cmd = 'set -x; ' + cmd
    192         return cmd
    193 
    194 
    195 def makepipeline(infile, steps, outfile):
    196     # Build a list with for each command:

    197     # [input filename or '', command string, kind, output filename or '']

    198 
    199     list = []
    200     for cmd, kind in steps:
    201         list.append(['', cmd, kind, ''])
    202     #

    203     # Make sure there is at least one step

    204     #

    205     if not list:
    206         list.append(['', 'cat', '--', ''])
    207     #

    208     # Take care of the input and output ends

    209     #

    210     [cmd, kind] = list[0][1:3]
    211     if kind[0] == 'f' and not infile:
    212         list.insert(0, ['', 'cat', '--', ''])
    213     list[0][0] = infile
    214     #

    215     [cmd, kind] = list[-1][1:3]
    216     if kind[1] == 'f' and not outfile:
    217         list.append(['', 'cat', '--', ''])
    218     list[-1][-1] = outfile
    219     #

    220     # Invent temporary files to connect stages that need files

    221     #

    222     garbage = []
    223     for i in range(1, len(list)):
    224         lkind = list[i-1][2]
    225         rkind = list[i][2]
    226         if lkind[1] == 'f' or rkind[0] == 'f':
    227             (fd, temp) = tempfile.mkstemp()
    228             os.close(fd)
    229             garbage.append(temp)
    230             list[i-1][-1] = list[i][0] = temp
    231     #

    232     for item in list:
    233         [inf, cmd, kind, outf] = item
    234         if kind[1] == 'f':
    235             cmd = 'OUT=' + quote(outf) + '; ' + cmd
    236         if kind[0] == 'f':
    237             cmd = 'IN=' + quote(inf) + '; ' + cmd
    238         if kind[0] == '-' and inf:
    239             cmd = cmd + ' <' + quote(inf)
    240         if kind[1] == '-' and outf:
    241             cmd = cmd + ' >' + quote(outf)
    242         item[1] = cmd
    243     #

    244     cmdlist = list[0][1]
    245     for item in list[1:]:
    246         [cmd, kind] = item[1:3]
    247         if item[0] == '':
    248             if 'f' in kind:
    249                 cmd = '{ ' + cmd + '; }'
    250             cmdlist = cmdlist + ' |\n' + cmd
    251         else:
    252             cmdlist = cmdlist + '\n' + cmd
    253     #

    254     if garbage:
    255         rmcmd = 'rm -f'
    256         for file in garbage:
    257             rmcmd = rmcmd + ' ' + quote(file)
    258         trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
    259         cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
    260     #

    261     return cmdlist
    262 
    263 
    264 # Reliably quote a string as a single argument for /bin/sh

    265 
    266 # Safe unquoted

    267 _safechars = frozenset(string.ascii_letters + string.digits + '@%_-+=:,./')
    268 
    269 def quote(file):
    270     """Return a shell-escaped version of the file string."""
    271     for c in file:
    272         if c not in _safechars:
    273             break
    274     else:
    275         if not file:
    276             return "''"
    277         return file
    278     # use single quotes, and put single quotes into double quotes

    279     # the string $'b is then quoted as '$'"'"'b'

    280     return "'" + file.replace("'", "'\"'\"'") + "'"
    281