Home | History | Annotate | Download | only in python2.7
      1 """Conversion pipeline templates.
      2 
      3 The problem:
      4 ------------
      5 
      6 Suppose you have some data that you want to convert to another format,
      7 such as from GIF image format to PPM image format.  Maybe the
      8 conversion involves several steps (e.g. piping it through compress or
      9 uuencode).  Some of the conversion steps may require that their input
     10 is a disk file, others may be able to read standard input; similar for
     11 their output.  The input to the entire conversion may also be read
     12 from a disk file or from an open file, and similar for its output.
     13 
     14 The module lets you construct a pipeline template by sticking one or
     15 more conversion steps together.  It will take care of creating and
     16 removing temporary files if they are necessary to hold intermediate
     17 data.  You can then use the template to do conversions from many
     18 different sources to many different destinations.  The temporary
     19 file names used are different each time the template is used.
     20 
     21 The templates are objects so you can create templates for many
     22 different conversion steps and store them in a dictionary, for
     23 instance.
     24 
     25 
     26 Directions:
     27 -----------
     28 
     29 To create a template:
     30     t = Template()
     31 
     32 To add a conversion step to a template:
     33    t.append(command, kind)
     34 where kind is a string of two characters: the first is '-' if the
     35 command reads its standard input or 'f' if it requires a file; the
     36 second likewise for the output. The command must be valid /bin/sh
     37 syntax.  If input or output files are required, they are passed as
     38 $IN and $OUT; otherwise, it must be  possible to use the command in
     39 a pipeline.
     40 
     41 To add a conversion step at the beginning:
     42    t.prepend(command, kind)
     43 
     44 To convert a file to another file using a template:
     45   sts = t.copy(infile, outfile)
     46 If infile or outfile are the empty string, standard input is read or
     47 standard output is written, respectively.  The return value is the
     48 exit status of the conversion pipeline.
     49 
     50 To open a file for reading or writing through a conversion pipeline:
     51    fp = t.open(file, mode)
     52 where mode is 'r' to read the file, or 'w' to write it -- just like
     53 for the built-in function open() or for os.popen().
     54 
     55 To create a new template object initialized to a given one:
     56    t2 = t.clone()
     57 """                                     # '
     58 
     59 
     60 import re
     61 import os
     62 import tempfile
     63 import string
     64 
     65 __all__ = ["Template"]
     66 
     67 # Conversion step kinds
     68 
     69 FILEIN_FILEOUT = 'ff'                   # Must read & write real files
     70 STDIN_FILEOUT  = '-f'                   # Must write a real file
     71 FILEIN_STDOUT  = 'f-'                   # Must read a real file
     72 STDIN_STDOUT   = '--'                   # Normal pipeline element
     73 SOURCE         = '.-'                   # Must be first, writes stdout
     74 SINK           = '-.'                   # Must be last, reads stdin
     75 
     76 stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
     77              SOURCE, SINK]
     78 
     79 
     80 class Template:
     81     """Class representing a pipeline template."""
     82 
     83     def __init__(self):
     84         """Template() returns a fresh pipeline template."""
     85         self.debugging = 0
     86         self.reset()
     87 
     88     def __repr__(self):
     89         """t.__repr__() implements repr(t)."""
     90         return '<Template instance, steps=%r>' % (self.steps,)
     91 
     92     def reset(self):
     93         """t.reset() restores a pipeline template to its initial state."""
     94         self.steps = []
     95 
     96     def clone(self):
     97         """t.clone() returns a new pipeline template with identical
     98         initial state as the current one."""
     99         t = Template()
    100         t.steps = self.steps[:]
    101         t.debugging = self.debugging
    102         return t
    103 
    104     def debug(self, flag):
    105         """t.debug(flag) turns debugging on or off."""
    106         self.debugging = flag
    107 
    108     def append(self, cmd, kind):
    109         """t.append(cmd, kind) adds a new step at the end."""
    110         if type(cmd) is not type(''):
    111             raise TypeError, \
    112                   'Template.append: cmd must be a string'
    113         if kind not in stepkinds:
    114             raise ValueError, \
    115                   'Template.append: bad kind %r' % (kind,)
    116         if kind == SOURCE:
    117             raise ValueError, \
    118                   'Template.append: SOURCE can only be prepended'
    119         if self.steps and self.steps[-1][1] == SINK:
    120             raise ValueError, \
    121                   'Template.append: already ends with SINK'
    122         if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
    123             raise ValueError, \
    124                   'Template.append: missing $IN in cmd'
    125         if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
    126             raise ValueError, \
    127                   'Template.append: missing $OUT in cmd'
    128         self.steps.append((cmd, kind))
    129 
    130     def prepend(self, cmd, kind):
    131         """t.prepend(cmd, kind) adds a new step at the front."""
    132         if type(cmd) is not type(''):
    133             raise TypeError, \
    134                   'Template.prepend: cmd must be a string'
    135         if kind not in stepkinds:
    136             raise ValueError, \
    137                   'Template.prepend: bad kind %r' % (kind,)
    138         if kind == SINK:
    139             raise ValueError, \
    140                   'Template.prepend: SINK can only be appended'
    141         if self.steps and self.steps[0][1] == SOURCE:
    142             raise ValueError, \
    143                   'Template.prepend: already begins with SOURCE'
    144         if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
    145             raise ValueError, \
    146                   'Template.prepend: missing $IN in cmd'
    147         if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
    148             raise ValueError, \
    149                   'Template.prepend: missing $OUT in cmd'
    150         self.steps.insert(0, (cmd, kind))
    151 
    152     def open(self, file, rw):
    153         """t.open(file, rw) returns a pipe or file object open for
    154         reading or writing; the file is the other end of the pipeline."""
    155         if rw == 'r':
    156             return self.open_r(file)
    157         if rw == 'w':
    158             return self.open_w(file)
    159         raise ValueError, \
    160               'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,)
    161 
    162     def open_r(self, file):
    163         """t.open_r(file) and t.open_w(file) implement
    164         t.open(file, 'r') and t.open(file, 'w') respectively."""
    165         if not self.steps:
    166             return open(file, 'r')
    167         if self.steps[-1][1] == SINK:
    168             raise ValueError, \
    169                   'Template.open_r: pipeline ends width SINK'
    170         cmd = self.makepipeline(file, '')
    171         return os.popen(cmd, 'r')
    172 
    173     def open_w(self, file):
    174         if not self.steps:
    175             return open(file, 'w')
    176         if self.steps[0][1] == SOURCE:
    177             raise ValueError, \
    178                   'Template.open_w: pipeline begins with SOURCE'
    179         cmd = self.makepipeline('', file)
    180         return os.popen(cmd, 'w')
    181 
    182     def copy(self, infile, outfile):
    183         return os.system(self.makepipeline(infile, outfile))
    184 
    185     def makepipeline(self, infile, outfile):
    186         cmd = makepipeline(infile, self.steps, outfile)
    187         if self.debugging:
    188             print cmd
    189             cmd = 'set -x; ' + cmd
    190         return cmd
    191 
    192 
    193 def makepipeline(infile, steps, outfile):
    194     # Build a list with for each command:
    195     # [input filename or '', command string, kind, output filename or '']
    196 
    197     list = []
    198     for cmd, kind in steps:
    199         list.append(['', cmd, kind, ''])
    200     #
    201     # Make sure there is at least one step
    202     #
    203     if not list:
    204         list.append(['', 'cat', '--', ''])
    205     #
    206     # Take care of the input and output ends
    207     #
    208     [cmd, kind] = list[0][1:3]
    209     if kind[0] == 'f' and not infile:
    210         list.insert(0, ['', 'cat', '--', ''])
    211     list[0][0] = infile
    212     #
    213     [cmd, kind] = list[-1][1:3]
    214     if kind[1] == 'f' and not outfile:
    215         list.append(['', 'cat', '--', ''])
    216     list[-1][-1] = outfile
    217     #
    218     # Invent temporary files to connect stages that need files
    219     #
    220     garbage = []
    221     for i in range(1, len(list)):
    222         lkind = list[i-1][2]
    223         rkind = list[i][2]
    224         if lkind[1] == 'f' or rkind[0] == 'f':
    225             (fd, temp) = tempfile.mkstemp()
    226             os.close(fd)
    227             garbage.append(temp)
    228             list[i-1][-1] = list[i][0] = temp
    229     #
    230     for item in list:
    231         [inf, cmd, kind, outf] = item
    232         if kind[1] == 'f':
    233             cmd = 'OUT=' + quote(outf) + '; ' + cmd
    234         if kind[0] == 'f':
    235             cmd = 'IN=' + quote(inf) + '; ' + cmd
    236         if kind[0] == '-' and inf:
    237             cmd = cmd + ' <' + quote(inf)
    238         if kind[1] == '-' and outf:
    239             cmd = cmd + ' >' + quote(outf)
    240         item[1] = cmd
    241     #
    242     cmdlist = list[0][1]
    243     for item in list[1:]:
    244         [cmd, kind] = item[1:3]
    245         if item[0] == '':
    246             if 'f' in kind:
    247                 cmd = '{ ' + cmd + '; }'
    248             cmdlist = cmdlist + ' |\n' + cmd
    249         else:
    250             cmdlist = cmdlist + '\n' + cmd
    251     #
    252     if garbage:
    253         rmcmd = 'rm -f'
    254         for file in garbage:
    255             rmcmd = rmcmd + ' ' + quote(file)
    256         trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
    257         cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
    258     #
    259     return cmdlist
    260 
    261 
    262 # Reliably quote a string as a single argument for /bin/sh
    263 
    264 # Safe unquoted
    265 _safechars = frozenset(string.ascii_letters + string.digits + '@%_-+=:,./')
    266 
    267 def quote(file):
    268     """Return a shell-escaped version of the file string."""
    269     for c in file:
    270         if c not in _safechars:
    271             break
    272     else:
    273         if not file:
    274             return "''"
    275         return file
    276     # use single quotes, and put single quotes into double quotes
    277     # the string $'b is then quoted as '$'"'"'b'
    278     return "'" + file.replace("'", "'\"'\"'") + "'"
    279