Home | History | Annotate | Download | only in Lib
      1 """Manage shelves of pickled objects.
      2 
      3 A "shelf" is a persistent, dictionary-like object.  The difference
      4 with dbm databases is that the values (not the keys!) in a shelf can
      5 be essentially arbitrary Python objects -- anything that the "pickle"
      6 module can handle.  This includes most class instances, recursive data
      7 types, and objects containing lots of shared sub-objects.  The keys
      8 are ordinary strings.
      9 
     10 To summarize the interface (key is a string, data is an arbitrary
     11 object):
     12 
     13         import shelve
     14         d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
     15 
     16         d[key] = data   # store data at key (overwrites old data if
     17                         # using an existing key)
     18         data = d[key]   # retrieve a COPY of the data at key (raise
     19                         # KeyError if no such key) -- NOTE that this
     20                         # access returns a *copy* of the entry!
     21         del d[key]      # delete data stored at key (raises KeyError
     22                         # if no such key)
     23         flag = d.has_key(key)   # true if the key exists; same as "key in d"
     24         list = d.keys() # a list of all existing keys (slow!)
     25 
     26         d.close()       # close it
     27 
     28 Dependent on the implementation, closing a persistent dictionary may
     29 or may not be necessary to flush changes to disk.
     30 
     31 Normally, d[key] returns a COPY of the entry.  This needs care when
     32 mutable entries are mutated: for example, if d[key] is a list,
     33         d[key].append(anitem)
     34 does NOT modify the entry d[key] itself, as stored in the persistent
     35 mapping -- it only modifies the copy, which is then immediately
     36 discarded, so that the append has NO effect whatsoever.  To append an
     37 item to d[key] in a way that will affect the persistent mapping, use:
     38         data = d[key]
     39         data.append(anitem)
     40         d[key] = data
     41 
     42 To avoid the problem with mutable entries, you may pass the keyword
     43 argument writeback=True in the call to shelve.open.  When you use:
     44         d = shelve.open(filename, writeback=True)
     45 then d keeps a cache of all entries you access, and writes them all back
     46 to the persistent mapping when you call d.close().  This ensures that
     47 such usage as d[key].append(anitem) works as intended.
     48 
     49 However, using keyword argument writeback=True may consume vast amount
     50 of memory for the cache, and it may make d.close() very slow, if you
     51 access many of d's entries after opening it in this way: d has no way to
     52 check which of the entries you access are mutable and/or which ones you
     53 actually mutate, so it must cache, and write back at close, all of the
     54 entries that you access.  You can call d.sync() to write back all the
     55 entries in the cache, and empty the cache (d.sync() also synchronizes
     56 the persistent dictionary on disk, if feasible).
     57 """
     58 
     59 # Try using cPickle and cStringIO if available.
     60 
     61 try:
     62     from cPickle import Pickler, Unpickler
     63 except ImportError:
     64     from pickle import Pickler, Unpickler
     65 
     66 try:
     67     from cStringIO import StringIO
     68 except ImportError:
     69     from StringIO import StringIO
     70 
     71 import UserDict
     72 
     73 __all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
     74 
     75 class _ClosedDict(UserDict.DictMixin):
     76     'Marker for a closed dict.  Access attempts raise a ValueError.'
     77 
     78     def closed(self, *args):
     79         raise ValueError('invalid operation on closed shelf')
     80     __getitem__ = __setitem__ = __delitem__ = keys = closed
     81 
     82     def __repr__(self):
     83         return '<Closed Dictionary>'
     84 
     85 class Shelf(UserDict.DictMixin):
     86     """Base class for shelf implementations.
     87 
     88     This is initialized with a dictionary-like object.
     89     See the module's __doc__ string for an overview of the interface.
     90     """
     91 
     92     def __init__(self, dict, protocol=None, writeback=False):
     93         self.dict = dict
     94         if protocol is None:
     95             protocol = 0
     96         self._protocol = protocol
     97         self.writeback = writeback
     98         self.cache = {}
     99 
    100     def keys(self):
    101         return self.dict.keys()
    102 
    103     def __len__(self):
    104         return len(self.dict)
    105 
    106     def has_key(self, key):
    107         return key in self.dict
    108 
    109     def __contains__(self, key):
    110         return key in self.dict
    111 
    112     def get(self, key, default=None):
    113         if key in self.dict:
    114             return self[key]
    115         return default
    116 
    117     def __getitem__(self, key):
    118         try:
    119             value = self.cache[key]
    120         except KeyError:
    121             f = StringIO(self.dict[key])
    122             value = Unpickler(f).load()
    123             if self.writeback:
    124                 self.cache[key] = value
    125         return value
    126 
    127     def __setitem__(self, key, value):
    128         if self.writeback:
    129             self.cache[key] = value
    130         f = StringIO()
    131         p = Pickler(f, self._protocol)
    132         p.dump(value)
    133         self.dict[key] = f.getvalue()
    134 
    135     def __delitem__(self, key):
    136         del self.dict[key]
    137         try:
    138             del self.cache[key]
    139         except KeyError:
    140             pass
    141 
    142     def close(self):
    143         if self.dict is None:
    144             return
    145         try:
    146             self.sync()
    147             try:
    148                 self.dict.close()
    149             except AttributeError:
    150                 pass
    151         finally:
    152             # Catch errors that may happen when close is called from __del__
    153             # because CPython is in interpreter shutdown.
    154             try:
    155                 self.dict = _ClosedDict()
    156             except:
    157                 self.dict = None
    158 
    159     def __del__(self):
    160         if not hasattr(self, 'writeback'):
    161             # __init__ didn't succeed, so don't bother closing
    162             return
    163         self.close()
    164 
    165     def sync(self):
    166         if self.writeback and self.cache:
    167             self.writeback = False
    168             for key, entry in self.cache.iteritems():
    169                 self[key] = entry
    170             self.writeback = True
    171             self.cache = {}
    172         if hasattr(self.dict, 'sync'):
    173             self.dict.sync()
    174 
    175 
    176 class BsdDbShelf(Shelf):
    177     """Shelf implementation using the "BSD" db interface.
    178 
    179     This adds methods first(), next(), previous(), last() and
    180     set_location() that have no counterpart in [g]dbm databases.
    181 
    182     The actual database must be opened using one of the "bsddb"
    183     modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
    184     bsddb.rnopen) and passed to the constructor.
    185 
    186     See the module's __doc__ string for an overview of the interface.
    187     """
    188 
    189     def __init__(self, dict, protocol=None, writeback=False):
    190         Shelf.__init__(self, dict, protocol, writeback)
    191 
    192     def set_location(self, key):
    193         (key, value) = self.dict.set_location(key)
    194         f = StringIO(value)
    195         return (key, Unpickler(f).load())
    196 
    197     def next(self):
    198         (key, value) = self.dict.next()
    199         f = StringIO(value)
    200         return (key, Unpickler(f).load())
    201 
    202     def previous(self):
    203         (key, value) = self.dict.previous()
    204         f = StringIO(value)
    205         return (key, Unpickler(f).load())
    206 
    207     def first(self):
    208         (key, value) = self.dict.first()
    209         f = StringIO(value)
    210         return (key, Unpickler(f).load())
    211 
    212     def last(self):
    213         (key, value) = self.dict.last()
    214         f = StringIO(value)
    215         return (key, Unpickler(f).load())
    216 
    217 
    218 class DbfilenameShelf(Shelf):
    219     """Shelf implementation using the "anydbm" generic dbm interface.
    220 
    221     This is initialized with the filename for the dbm database.
    222     See the module's __doc__ string for an overview of the interface.
    223     """
    224 
    225     def __init__(self, filename, flag='c', protocol=None, writeback=False):
    226         import anydbm
    227         Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback)
    228 
    229 
    230 def open(filename, flag='c', protocol=None, writeback=False):
    231     """Open a persistent dictionary for reading and writing.
    232 
    233     The filename parameter is the base filename for the underlying
    234     database.  As a side-effect, an extension may be added to the
    235     filename and more than one file may be created.  The optional flag
    236     parameter has the same interpretation as the flag parameter of
    237     anydbm.open(). The optional protocol parameter specifies the
    238     version of the pickle protocol (0, 1, or 2).
    239 
    240     See the module's __doc__ string for an overview of the interface.
    241     """
    242 
    243     return DbfilenameShelf(filename, flag, protocol, writeback)
    244