1 """A dumb and slow but simple dbm clone. 2 3 For database spam, spam.dir contains the index (a text file), 4 spam.bak *may* contain a backup of the index (also a text file), 5 while spam.dat contains the data (a binary file). 6 7 XXX TO DO: 8 9 - seems to contain a bug when updating... 10 11 - reclaim free space (currently, space once occupied by deleted or expanded 12 items is never reused) 13 14 - support concurrent access (currently, if two processes take turns making 15 updates, they can mess up the index) 16 17 - support efficient access to large databases (currently, the whole index 18 is read when the database is opened, and some updates rewrite the whole index) 19 20 - support opening for read-only (flag = 'm') 21 22 """ 23 24 import os as _os 25 import __builtin__ 26 import UserDict 27 28 _open = __builtin__.open 29 30 _BLOCKSIZE = 512 31 32 error = IOError # For anydbm 33 34 class _Database(UserDict.DictMixin): 35 36 # The on-disk directory and data files can remain in mutually 37 # inconsistent states for an arbitrarily long time (see comments 38 # at the end of __setitem__). This is only repaired when _commit() 39 # gets called. One place _commit() gets called is from __del__(), 40 # and if that occurs at program shutdown time, module globals may 41 # already have gotten rebound to None. Since it's crucial that 42 # _commit() finish successfully, we can't ignore shutdown races 43 # here, and _commit() must not reference any globals. 44 _os = _os # for _commit() 45 _open = _open # for _commit() 46 47 def __init__(self, filebasename, mode): 48 self._mode = mode 49 50 # The directory file is a text file. Each line looks like 51 # "%r, (%d, %d)\n" % (key, pos, siz) 52 # where key is the string key, pos is the offset into the dat 53 # file of the associated value's first byte, and siz is the number 54 # of bytes in the associated value. 55 self._dirfile = filebasename + _os.extsep + 'dir' 56 57 # The data file is a binary file pointed into by the directory 58 # file, and holds the values associated with keys. Each value 59 # begins at a _BLOCKSIZE-aligned byte offset, and is a raw 60 # binary 8-bit string value. 61 self._datfile = filebasename + _os.extsep + 'dat' 62 self._bakfile = filebasename + _os.extsep + 'bak' 63 64 # The index is an in-memory dict, mirroring the directory file. 65 self._index = None # maps keys to (pos, siz) pairs 66 67 # Mod by Jack: create data file if needed 68 try: 69 f = _open(self._datfile, 'r') 70 except IOError: 71 f = _open(self._datfile, 'w') 72 self._chmod(self._datfile) 73 f.close() 74 self._update() 75 76 # Read directory file into the in-memory index dict. 77 def _update(self): 78 self._index = {} 79 try: 80 f = _open(self._dirfile) 81 except IOError: 82 pass 83 else: 84 for line in f: 85 line = line.rstrip() 86 key, pos_and_siz_pair = eval(line) 87 self._index[key] = pos_and_siz_pair 88 f.close() 89 90 # Write the index dict to the directory file. The original directory 91 # file (if any) is renamed with a .bak extension first. If a .bak 92 # file currently exists, it's deleted. 93 def _commit(self): 94 # CAUTION: It's vital that _commit() succeed, and _commit() can 95 # be called from __del__(). Therefore we must never reference a 96 # global in this routine. 97 if self._index is None: 98 return # nothing to do 99 100 try: 101 self._os.unlink(self._bakfile) 102 except self._os.error: 103 pass 104 105 try: 106 self._os.rename(self._dirfile, self._bakfile) 107 except self._os.error: 108 pass 109 110 f = self._open(self._dirfile, 'w') 111 self._chmod(self._dirfile) 112 for key, pos_and_siz_pair in self._index.iteritems(): 113 f.write("%r, %r\n" % (key, pos_and_siz_pair)) 114 f.close() 115 116 sync = _commit 117 118 def __getitem__(self, key): 119 pos, siz = self._index[key] # may raise KeyError 120 f = _open(self._datfile, 'rb') 121 f.seek(pos) 122 dat = f.read(siz) 123 f.close() 124 return dat 125 126 # Append val to the data file, starting at a _BLOCKSIZE-aligned 127 # offset. The data file is first padded with NUL bytes (if needed) 128 # to get to an aligned offset. Return pair 129 # (starting offset of val, len(val)) 130 def _addval(self, val): 131 f = _open(self._datfile, 'rb+') 132 f.seek(0, 2) 133 pos = int(f.tell()) 134 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE 135 f.write('\0'*(npos-pos)) 136 pos = npos 137 f.write(val) 138 f.close() 139 return (pos, len(val)) 140 141 # Write val to the data file, starting at offset pos. The caller 142 # is responsible for ensuring that there's enough room starting at 143 # pos to hold val, without overwriting some other value. Return 144 # pair (pos, len(val)). 145 def _setval(self, pos, val): 146 f = _open(self._datfile, 'rb+') 147 f.seek(pos) 148 f.write(val) 149 f.close() 150 return (pos, len(val)) 151 152 # key is a new key whose associated value starts in the data file 153 # at offset pos and with length siz. Add an index record to 154 # the in-memory index dict, and append one to the directory file. 155 def _addkey(self, key, pos_and_siz_pair): 156 self._index[key] = pos_and_siz_pair 157 f = _open(self._dirfile, 'a') 158 self._chmod(self._dirfile) 159 f.write("%r, %r\n" % (key, pos_and_siz_pair)) 160 f.close() 161 162 def __setitem__(self, key, val): 163 if not type(key) == type('') == type(val): 164 raise TypeError, "keys and values must be strings" 165 if key not in self._index: 166 self._addkey(key, self._addval(val)) 167 else: 168 # See whether the new value is small enough to fit in the 169 # (padded) space currently occupied by the old value. 170 pos, siz = self._index[key] 171 oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE 172 newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE 173 if newblocks <= oldblocks: 174 self._index[key] = self._setval(pos, val) 175 else: 176 # The new value doesn't fit in the (padded) space used 177 # by the old value. The blocks used by the old value are 178 # forever lost. 179 self._index[key] = self._addval(val) 180 181 # Note that _index may be out of synch with the directory 182 # file now: _setval() and _addval() don't update the directory 183 # file. This also means that the on-disk directory and data 184 # files are in a mutually inconsistent state, and they'll 185 # remain that way until _commit() is called. Note that this 186 # is a disaster (for the database) if the program crashes 187 # (so that _commit() never gets called). 188 189 def __delitem__(self, key): 190 # The blocks used by the associated value are lost. 191 del self._index[key] 192 # XXX It's unclear why we do a _commit() here (the code always 193 # XXX has, so I'm not changing it). _setitem__ doesn't try to 194 # XXX keep the directory file in synch. Why should we? Or 195 # XXX why shouldn't __setitem__? 196 self._commit() 197 198 def keys(self): 199 return self._index.keys() 200 201 def has_key(self, key): 202 return key in self._index 203 204 def __contains__(self, key): 205 return key in self._index 206 207 def iterkeys(self): 208 return self._index.iterkeys() 209 __iter__ = iterkeys 210 211 def __len__(self): 212 return len(self._index) 213 214 def close(self): 215 self._commit() 216 self._index = self._datfile = self._dirfile = self._bakfile = None 217 218 __del__ = close 219 220 def _chmod (self, file): 221 if hasattr(self._os, 'chmod'): 222 self._os.chmod(file, self._mode) 223 224 225 def open(file, flag=None, mode=0666): 226 """Open the database file, filename, and return corresponding object. 227 228 The flag argument, used to control how the database is opened in the 229 other DBM implementations, is ignored in the dumbdbm module; the 230 database is always opened for update, and will be created if it does 231 not exist. 232 233 The optional mode argument is the UNIX mode of the file, used only when 234 the database has to be created. It defaults to octal code 0666 (and 235 will be modified by the prevailing umask). 236 237 """ 238 # flag argument is currently ignored 239 240 # Modify mode depending on the umask 241 try: 242 um = _os.umask(0) 243 _os.umask(um) 244 except AttributeError: 245 pass 246 else: 247 # Turn off any bits that are set in the umask 248 mode = mode & (~um) 249 250 return _Database(file, mode) 251