1 """Manage shelves of pickled objects. 2 3 A "shelf" is a persistent, dictionary-like object. The difference 4 with dbm databases is that the values (not the keys!) in a shelf can 5 be essentially arbitrary Python objects -- anything that the "pickle" 6 module can handle. This includes most class instances, recursive data 7 types, and objects containing lots of shared sub-objects. The keys 8 are ordinary strings. 9 10 To summarize the interface (key is a string, data is an arbitrary 11 object): 12 13 import shelve 14 d = shelve.open(filename) # open, with (g)dbm filename -- no suffix 15 16 d[key] = data # store data at key (overwrites old data if 17 # using an existing key) 18 data = d[key] # retrieve a COPY of the data at key (raise 19 # KeyError if no such key) -- NOTE that this 20 # access returns a *copy* of the entry! 21 del d[key] # delete data stored at key (raises KeyError 22 # if no such key) 23 flag = d.has_key(key) # true if the key exists; same as "key in d" 24 list = d.keys() # a list of all existing keys (slow!) 25 26 d.close() # close it 27 28 Dependent on the implementation, closing a persistent dictionary may 29 or may not be necessary to flush changes to disk. 30 31 Normally, d[key] returns a COPY of the entry. This needs care when 32 mutable entries are mutated: for example, if d[key] is a list, 33 d[key].append(anitem) 34 does NOT modify the entry d[key] itself, as stored in the persistent 35 mapping -- it only modifies the copy, which is then immediately 36 discarded, so that the append has NO effect whatsoever. To append an 37 item to d[key] in a way that will affect the persistent mapping, use: 38 data = d[key] 39 data.append(anitem) 40 d[key] = data 41 42 To avoid the problem with mutable entries, you may pass the keyword 43 argument writeback=True in the call to shelve.open. When you use: 44 d = shelve.open(filename, writeback=True) 45 then d keeps a cache of all entries you access, and writes them all back 46 to the persistent mapping when you call d.close(). This ensures that 47 such usage as d[key].append(anitem) works as intended. 48 49 However, using keyword argument writeback=True may consume vast amount 50 of memory for the cache, and it may make d.close() very slow, if you 51 access many of d's entries after opening it in this way: d has no way to 52 check which of the entries you access are mutable and/or which ones you 53 actually mutate, so it must cache, and write back at close, all of the 54 entries that you access. You can call d.sync() to write back all the 55 entries in the cache, and empty the cache (d.sync() also synchronizes 56 the persistent dictionary on disk, if feasible). 57 """ 58 59 # Try using cPickle and cStringIO if available. 60 61 try: 62 from cPickle import Pickler, Unpickler 63 except ImportError: 64 from pickle import Pickler, Unpickler 65 66 try: 67 from cStringIO import StringIO 68 except ImportError: 69 from StringIO import StringIO 70 71 import UserDict 72 73 __all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"] 74 75 class _ClosedDict(UserDict.DictMixin): 76 'Marker for a closed dict. Access attempts raise a ValueError.' 77 78 def closed(self, *args): 79 raise ValueError('invalid operation on closed shelf') 80 __getitem__ = __setitem__ = __delitem__ = keys = closed 81 82 def __repr__(self): 83 return '<Closed Dictionary>' 84 85 class Shelf(UserDict.DictMixin): 86 """Base class for shelf implementations. 87 88 This is initialized with a dictionary-like object. 89 See the module's __doc__ string for an overview of the interface. 90 """ 91 92 def __init__(self, dict, protocol=None, writeback=False): 93 self.dict = dict 94 if protocol is None: 95 protocol = 0 96 self._protocol = protocol 97 self.writeback = writeback 98 self.cache = {} 99 100 def keys(self): 101 return self.dict.keys() 102 103 def __len__(self): 104 return len(self.dict) 105 106 def has_key(self, key): 107 return key in self.dict 108 109 def __contains__(self, key): 110 return key in self.dict 111 112 def get(self, key, default=None): 113 if key in self.dict: 114 return self[key] 115 return default 116 117 def __getitem__(self, key): 118 try: 119 value = self.cache[key] 120 except KeyError: 121 f = StringIO(self.dict[key]) 122 value = Unpickler(f).load() 123 if self.writeback: 124 self.cache[key] = value 125 return value 126 127 def __setitem__(self, key, value): 128 if self.writeback: 129 self.cache[key] = value 130 f = StringIO() 131 p = Pickler(f, self._protocol) 132 p.dump(value) 133 self.dict[key] = f.getvalue() 134 135 def __delitem__(self, key): 136 del self.dict[key] 137 try: 138 del self.cache[key] 139 except KeyError: 140 pass 141 142 def close(self): 143 self.sync() 144 try: 145 self.dict.close() 146 except AttributeError: 147 pass 148 # Catch errors that may happen when close is called from __del__ 149 # because CPython is in interpreter shutdown. 150 try: 151 self.dict = _ClosedDict() 152 except (NameError, TypeError): 153 self.dict = None 154 155 def __del__(self): 156 if not hasattr(self, 'writeback'): 157 # __init__ didn't succeed, so don't bother closing 158 return 159 self.close() 160 161 def sync(self): 162 if self.writeback and self.cache: 163 self.writeback = False 164 for key, entry in self.cache.iteritems(): 165 self[key] = entry 166 self.writeback = True 167 self.cache = {} 168 if hasattr(self.dict, 'sync'): 169 self.dict.sync() 170 171 172 class BsdDbShelf(Shelf): 173 """Shelf implementation using the "BSD" db interface. 174 175 This adds methods first(), next(), previous(), last() and 176 set_location() that have no counterpart in [g]dbm databases. 177 178 The actual database must be opened using one of the "bsddb" 179 modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or 180 bsddb.rnopen) and passed to the constructor. 181 182 See the module's __doc__ string for an overview of the interface. 183 """ 184 185 def __init__(self, dict, protocol=None, writeback=False): 186 Shelf.__init__(self, dict, protocol, writeback) 187 188 def set_location(self, key): 189 (key, value) = self.dict.set_location(key) 190 f = StringIO(value) 191 return (key, Unpickler(f).load()) 192 193 def next(self): 194 (key, value) = self.dict.next() 195 f = StringIO(value) 196 return (key, Unpickler(f).load()) 197 198 def previous(self): 199 (key, value) = self.dict.previous() 200 f = StringIO(value) 201 return (key, Unpickler(f).load()) 202 203 def first(self): 204 (key, value) = self.dict.first() 205 f = StringIO(value) 206 return (key, Unpickler(f).load()) 207 208 def last(self): 209 (key, value) = self.dict.last() 210 f = StringIO(value) 211 return (key, Unpickler(f).load()) 212 213 214 class DbfilenameShelf(Shelf): 215 """Shelf implementation using the "anydbm" generic dbm interface. 216 217 This is initialized with the filename for the dbm database. 218 See the module's __doc__ string for an overview of the interface. 219 """ 220 221 def __init__(self, filename, flag='c', protocol=None, writeback=False): 222 import anydbm 223 Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback) 224 225 226 def open(filename, flag='c', protocol=None, writeback=False): 227 """Open a persistent dictionary for reading and writing. 228 229 The filename parameter is the base filename for the underlying 230 database. As a side-effect, an extension may be added to the 231 filename and more than one file may be created. The optional flag 232 parameter has the same interpretation as the flag parameter of 233 anydbm.open(). The optional protocol parameter specifies the 234 version of the pickle protocol (0, 1, or 2). 235 236 See the module's __doc__ string for an overview of the interface. 237 """ 238 239 return DbfilenameShelf(filename, flag, protocol, writeback) 240