1 """Conversion pipeline templates. 2 3 The problem: 4 ------------ 5 6 Suppose you have some data that you want to convert to another format, 7 such as from GIF image format to PPM image format. Maybe the 8 conversion involves several steps (e.g. piping it through compress or 9 uuencode). Some of the conversion steps may require that their input 10 is a disk file, others may be able to read standard input; similar for 11 their output. The input to the entire conversion may also be read 12 from a disk file or from an open file, and similar for its output. 13 14 The module lets you construct a pipeline template by sticking one or 15 more conversion steps together. It will take care of creating and 16 removing temporary files if they are necessary to hold intermediate 17 data. You can then use the template to do conversions from many 18 different sources to many different destinations. The temporary 19 file names used are different each time the template is used. 20 21 The templates are objects so you can create templates for many 22 different conversion steps and store them in a dictionary, for 23 instance. 24 25 26 Directions: 27 ----------- 28 29 To create a template: 30 t = Template() 31 32 To add a conversion step to a template: 33 t.append(command, kind) 34 where kind is a string of two characters: the first is '-' if the 35 command reads its standard input or 'f' if it requires a file; the 36 second likewise for the output. The command must be valid /bin/sh 37 syntax. If input or output files are required, they are passed as 38 $IN and $OUT; otherwise, it must be possible to use the command in 39 a pipeline. 40 41 To add a conversion step at the beginning: 42 t.prepend(command, kind) 43 44 To convert a file to another file using a template: 45 sts = t.copy(infile, outfile) 46 If infile or outfile are the empty string, standard input is read or 47 standard output is written, respectively. The return value is the 48 exit status of the conversion pipeline. 49 50 To open a file for reading or writing through a conversion pipeline: 51 fp = t.open(file, mode) 52 where mode is 'r' to read the file, or 'w' to write it -- just like 53 for the built-in function open() or for os.popen(). 54 55 To create a new template object initialized to a given one: 56 t2 = t.clone() 57 """ # ' 58 59 60 import re 61 import os 62 import tempfile 63 # we import the quote function rather than the module for backward compat 64 # (quote used to be an undocumented but used function in pipes) 65 from shlex import quote 66 67 __all__ = ["Template"] 68 69 # Conversion step kinds 70 71 FILEIN_FILEOUT = 'ff' # Must read & write real files 72 STDIN_FILEOUT = '-f' # Must write a real file 73 FILEIN_STDOUT = 'f-' # Must read a real file 74 STDIN_STDOUT = '--' # Normal pipeline element 75 SOURCE = '.-' # Must be first, writes stdout 76 SINK = '-.' # Must be last, reads stdin 77 78 stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ 79 SOURCE, SINK] 80 81 82 class Template: 83 """Class representing a pipeline template.""" 84 85 def __init__(self): 86 """Template() returns a fresh pipeline template.""" 87 self.debugging = 0 88 self.reset() 89 90 def __repr__(self): 91 """t.__repr__() implements repr(t).""" 92 return '<Template instance, steps=%r>' % (self.steps,) 93 94 def reset(self): 95 """t.reset() restores a pipeline template to its initial state.""" 96 self.steps = [] 97 98 def clone(self): 99 """t.clone() returns a new pipeline template with identical 100 initial state as the current one.""" 101 t = Template() 102 t.steps = self.steps[:] 103 t.debugging = self.debugging 104 return t 105 106 def debug(self, flag): 107 """t.debug(flag) turns debugging on or off.""" 108 self.debugging = flag 109 110 def append(self, cmd, kind): 111 """t.append(cmd, kind) adds a new step at the end.""" 112 if type(cmd) is not type(''): 113 raise TypeError('Template.append: cmd must be a string') 114 if kind not in stepkinds: 115 raise ValueError('Template.append: bad kind %r' % (kind,)) 116 if kind == SOURCE: 117 raise ValueError('Template.append: SOURCE can only be prepended') 118 if self.steps and self.steps[-1][1] == SINK: 119 raise ValueError('Template.append: already ends with SINK') 120 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): 121 raise ValueError('Template.append: missing $IN in cmd') 122 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): 123 raise ValueError('Template.append: missing $OUT in cmd') 124 self.steps.append((cmd, kind)) 125 126 def prepend(self, cmd, kind): 127 """t.prepend(cmd, kind) adds a new step at the front.""" 128 if type(cmd) is not type(''): 129 raise TypeError('Template.prepend: cmd must be a string') 130 if kind not in stepkinds: 131 raise ValueError('Template.prepend: bad kind %r' % (kind,)) 132 if kind == SINK: 133 raise ValueError('Template.prepend: SINK can only be appended') 134 if self.steps and self.steps[0][1] == SOURCE: 135 raise ValueError('Template.prepend: already begins with SOURCE') 136 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): 137 raise ValueError('Template.prepend: missing $IN in cmd') 138 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): 139 raise ValueError('Template.prepend: missing $OUT in cmd') 140 self.steps.insert(0, (cmd, kind)) 141 142 def open(self, file, rw): 143 """t.open(file, rw) returns a pipe or file object open for 144 reading or writing; the file is the other end of the pipeline.""" 145 if rw == 'r': 146 return self.open_r(file) 147 if rw == 'w': 148 return self.open_w(file) 149 raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r' 150 % (rw,)) 151 152 def open_r(self, file): 153 """t.open_r(file) and t.open_w(file) implement 154 t.open(file, 'r') and t.open(file, 'w') respectively.""" 155 if not self.steps: 156 return open(file, 'r') 157 if self.steps[-1][1] == SINK: 158 raise ValueError('Template.open_r: pipeline ends width SINK') 159 cmd = self.makepipeline(file, '') 160 return os.popen(cmd, 'r') 161 162 def open_w(self, file): 163 if not self.steps: 164 return open(file, 'w') 165 if self.steps[0][1] == SOURCE: 166 raise ValueError('Template.open_w: pipeline begins with SOURCE') 167 cmd = self.makepipeline('', file) 168 return os.popen(cmd, 'w') 169 170 def copy(self, infile, outfile): 171 return os.system(self.makepipeline(infile, outfile)) 172 173 def makepipeline(self, infile, outfile): 174 cmd = makepipeline(infile, self.steps, outfile) 175 if self.debugging: 176 print(cmd) 177 cmd = 'set -x; ' + cmd 178 return cmd 179 180 181 def makepipeline(infile, steps, outfile): 182 # Build a list with for each command: 183 # [input filename or '', command string, kind, output filename or ''] 184 185 list = [] 186 for cmd, kind in steps: 187 list.append(['', cmd, kind, '']) 188 # 189 # Make sure there is at least one step 190 # 191 if not list: 192 list.append(['', 'cat', '--', '']) 193 # 194 # Take care of the input and output ends 195 # 196 [cmd, kind] = list[0][1:3] 197 if kind[0] == 'f' and not infile: 198 list.insert(0, ['', 'cat', '--', '']) 199 list[0][0] = infile 200 # 201 [cmd, kind] = list[-1][1:3] 202 if kind[1] == 'f' and not outfile: 203 list.append(['', 'cat', '--', '']) 204 list[-1][-1] = outfile 205 # 206 # Invent temporary files to connect stages that need files 207 # 208 garbage = [] 209 for i in range(1, len(list)): 210 lkind = list[i-1][2] 211 rkind = list[i][2] 212 if lkind[1] == 'f' or rkind[0] == 'f': 213 (fd, temp) = tempfile.mkstemp() 214 os.close(fd) 215 garbage.append(temp) 216 list[i-1][-1] = list[i][0] = temp 217 # 218 for item in list: 219 [inf, cmd, kind, outf] = item 220 if kind[1] == 'f': 221 cmd = 'OUT=' + quote(outf) + '; ' + cmd 222 if kind[0] == 'f': 223 cmd = 'IN=' + quote(inf) + '; ' + cmd 224 if kind[0] == '-' and inf: 225 cmd = cmd + ' <' + quote(inf) 226 if kind[1] == '-' and outf: 227 cmd = cmd + ' >' + quote(outf) 228 item[1] = cmd 229 # 230 cmdlist = list[0][1] 231 for item in list[1:]: 232 [cmd, kind] = item[1:3] 233 if item[0] == '': 234 if 'f' in kind: 235 cmd = '{ ' + cmd + '; }' 236 cmdlist = cmdlist + ' |\n' + cmd 237 else: 238 cmdlist = cmdlist + '\n' + cmd 239 # 240 if garbage: 241 rmcmd = 'rm -f' 242 for file in garbage: 243 rmcmd = rmcmd + ' ' + quote(file) 244 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' 245 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd 246 # 247 return cmdlist 248