1 """Conversion pipeline templates. 2 3 The problem: 4 ------------ 5 6 Suppose you have some data that you want to convert to another format, 7 such as from GIF image format to PPM image format. Maybe the 8 conversion involves several steps (e.g. piping it through compress or 9 uuencode). Some of the conversion steps may require that their input 10 is a disk file, others may be able to read standard input; similar for 11 their output. The input to the entire conversion may also be read 12 from a disk file or from an open file, and similar for its output. 13 14 The module lets you construct a pipeline template by sticking one or 15 more conversion steps together. It will take care of creating and 16 removing temporary files if they are necessary to hold intermediate 17 data. You can then use the template to do conversions from many 18 different sources to many different destinations. The temporary 19 file names used are different each time the template is used. 20 21 The templates are objects so you can create templates for many 22 different conversion steps and store them in a dictionary, for 23 instance. 24 25 26 Directions: 27 ----------- 28 29 To create a template: 30 t = Template() 31 32 To add a conversion step to a template: 33 t.append(command, kind) 34 where kind is a string of two characters: the first is '-' if the 35 command reads its standard input or 'f' if it requires a file; the 36 second likewise for the output. The command must be valid /bin/sh 37 syntax. If input or output files are required, they are passed as 38 $IN and $OUT; otherwise, it must be possible to use the command in 39 a pipeline. 40 41 To add a conversion step at the beginning: 42 t.prepend(command, kind) 43 44 To convert a file to another file using a template: 45 sts = t.copy(infile, outfile) 46 If infile or outfile are the empty string, standard input is read or 47 standard output is written, respectively. The return value is the 48 exit status of the conversion pipeline. 49 50 To open a file for reading or writing through a conversion pipeline: 51 fp = t.open(file, mode) 52 where mode is 'r' to read the file, or 'w' to write it -- just like 53 for the built-in function open() or for os.popen(). 54 55 To create a new template object initialized to a given one: 56 t2 = t.clone() 57 """ # ' 58 59 60 import re 61 import os 62 import tempfile 63 import string 64 65 __all__ = ["Template"] 66 67 # Conversion step kinds 68 69 FILEIN_FILEOUT = 'ff' # Must read & write real files 70 STDIN_FILEOUT = '-f' # Must write a real file 71 FILEIN_STDOUT = 'f-' # Must read a real file 72 STDIN_STDOUT = '--' # Normal pipeline element 73 SOURCE = '.-' # Must be first, writes stdout 74 SINK = '-.' # Must be last, reads stdin 75 76 stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ 77 SOURCE, SINK] 78 79 80 class Template: 81 """Class representing a pipeline template.""" 82 83 def __init__(self): 84 """Template() returns a fresh pipeline template.""" 85 self.debugging = 0 86 self.reset() 87 88 def __repr__(self): 89 """t.__repr__() implements repr(t).""" 90 return '<Template instance, steps=%r>' % (self.steps,) 91 92 def reset(self): 93 """t.reset() restores a pipeline template to its initial state.""" 94 self.steps = [] 95 96 def clone(self): 97 """t.clone() returns a new pipeline template with identical 98 initial state as the current one.""" 99 t = Template() 100 t.steps = self.steps[:] 101 t.debugging = self.debugging 102 return t 103 104 def debug(self, flag): 105 """t.debug(flag) turns debugging on or off.""" 106 self.debugging = flag 107 108 def append(self, cmd, kind): 109 """t.append(cmd, kind) adds a new step at the end.""" 110 if type(cmd) is not type(''): 111 raise TypeError, \ 112 'Template.append: cmd must be a string' 113 if kind not in stepkinds: 114 raise ValueError, \ 115 'Template.append: bad kind %r' % (kind,) 116 if kind == SOURCE: 117 raise ValueError, \ 118 'Template.append: SOURCE can only be prepended' 119 if self.steps and self.steps[-1][1] == SINK: 120 raise ValueError, \ 121 'Template.append: already ends with SINK' 122 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): 123 raise ValueError, \ 124 'Template.append: missing $IN in cmd' 125 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): 126 raise ValueError, \ 127 'Template.append: missing $OUT in cmd' 128 self.steps.append((cmd, kind)) 129 130 def prepend(self, cmd, kind): 131 """t.prepend(cmd, kind) adds a new step at the front.""" 132 if type(cmd) is not type(''): 133 raise TypeError, \ 134 'Template.prepend: cmd must be a string' 135 if kind not in stepkinds: 136 raise ValueError, \ 137 'Template.prepend: bad kind %r' % (kind,) 138 if kind == SINK: 139 raise ValueError, \ 140 'Template.prepend: SINK can only be appended' 141 if self.steps and self.steps[0][1] == SOURCE: 142 raise ValueError, \ 143 'Template.prepend: already begins with SOURCE' 144 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): 145 raise ValueError, \ 146 'Template.prepend: missing $IN in cmd' 147 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): 148 raise ValueError, \ 149 'Template.prepend: missing $OUT in cmd' 150 self.steps.insert(0, (cmd, kind)) 151 152 def open(self, file, rw): 153 """t.open(file, rw) returns a pipe or file object open for 154 reading or writing; the file is the other end of the pipeline.""" 155 if rw == 'r': 156 return self.open_r(file) 157 if rw == 'w': 158 return self.open_w(file) 159 raise ValueError, \ 160 'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,) 161 162 def open_r(self, file): 163 """t.open_r(file) and t.open_w(file) implement 164 t.open(file, 'r') and t.open(file, 'w') respectively.""" 165 if not self.steps: 166 return open(file, 'r') 167 if self.steps[-1][1] == SINK: 168 raise ValueError, \ 169 'Template.open_r: pipeline ends width SINK' 170 cmd = self.makepipeline(file, '') 171 return os.popen(cmd, 'r') 172 173 def open_w(self, file): 174 if not self.steps: 175 return open(file, 'w') 176 if self.steps[0][1] == SOURCE: 177 raise ValueError, \ 178 'Template.open_w: pipeline begins with SOURCE' 179 cmd = self.makepipeline('', file) 180 return os.popen(cmd, 'w') 181 182 def copy(self, infile, outfile): 183 return os.system(self.makepipeline(infile, outfile)) 184 185 def makepipeline(self, infile, outfile): 186 cmd = makepipeline(infile, self.steps, outfile) 187 if self.debugging: 188 print cmd 189 cmd = 'set -x; ' + cmd 190 return cmd 191 192 193 def makepipeline(infile, steps, outfile): 194 # Build a list with for each command: 195 # [input filename or '', command string, kind, output filename or ''] 196 197 list = [] 198 for cmd, kind in steps: 199 list.append(['', cmd, kind, '']) 200 # 201 # Make sure there is at least one step 202 # 203 if not list: 204 list.append(['', 'cat', '--', '']) 205 # 206 # Take care of the input and output ends 207 # 208 [cmd, kind] = list[0][1:3] 209 if kind[0] == 'f' and not infile: 210 list.insert(0, ['', 'cat', '--', '']) 211 list[0][0] = infile 212 # 213 [cmd, kind] = list[-1][1:3] 214 if kind[1] == 'f' and not outfile: 215 list.append(['', 'cat', '--', '']) 216 list[-1][-1] = outfile 217 # 218 # Invent temporary files to connect stages that need files 219 # 220 garbage = [] 221 for i in range(1, len(list)): 222 lkind = list[i-1][2] 223 rkind = list[i][2] 224 if lkind[1] == 'f' or rkind[0] == 'f': 225 (fd, temp) = tempfile.mkstemp() 226 os.close(fd) 227 garbage.append(temp) 228 list[i-1][-1] = list[i][0] = temp 229 # 230 for item in list: 231 [inf, cmd, kind, outf] = item 232 if kind[1] == 'f': 233 cmd = 'OUT=' + quote(outf) + '; ' + cmd 234 if kind[0] == 'f': 235 cmd = 'IN=' + quote(inf) + '; ' + cmd 236 if kind[0] == '-' and inf: 237 cmd = cmd + ' <' + quote(inf) 238 if kind[1] == '-' and outf: 239 cmd = cmd + ' >' + quote(outf) 240 item[1] = cmd 241 # 242 cmdlist = list[0][1] 243 for item in list[1:]: 244 [cmd, kind] = item[1:3] 245 if item[0] == '': 246 if 'f' in kind: 247 cmd = '{ ' + cmd + '; }' 248 cmdlist = cmdlist + ' |\n' + cmd 249 else: 250 cmdlist = cmdlist + '\n' + cmd 251 # 252 if garbage: 253 rmcmd = 'rm -f' 254 for file in garbage: 255 rmcmd = rmcmd + ' ' + quote(file) 256 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' 257 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd 258 # 259 return cmdlist 260 261 262 # Reliably quote a string as a single argument for /bin/sh 263 264 # Safe unquoted 265 _safechars = frozenset(string.ascii_letters + string.digits + '@%_-+=:,./') 266 267 def quote(file): 268 """Return a shell-escaped version of the file string.""" 269 for c in file: 270 if c not in _safechars: 271 break 272 else: 273 if not file: 274 return "''" 275 return file 276 # use single quotes, and put single quotes into double quotes 277 # the string $'b is then quoted as '$'"'"'b' 278 return "'" + file.replace("'", "'\"'\"'") + "'" 279