Home | History | Annotate | Download | only in Lib
      1 """Manage shelves of pickled objects.
      2 
      3 A "shelf" is a persistent, dictionary-like object.  The difference
      4 with dbm databases is that the values (not the keys!) in a shelf can
      5 be essentially arbitrary Python objects -- anything that the "pickle"
      6 module can handle.  This includes most class instances, recursive data
      7 types, and objects containing lots of shared sub-objects.  The keys
      8 are ordinary strings.
      9 
     10 To summarize the interface (key is a string, data is an arbitrary
     11 object):
     12 
     13         import shelve
     14         d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
     15 
     16         d[key] = data   # store data at key (overwrites old data if
     17                         # using an existing key)
     18         data = d[key]   # retrieve a COPY of the data at key (raise
     19                         # KeyError if no such key) -- NOTE that this
     20                         # access returns a *copy* of the entry!
     21         del d[key]      # delete data stored at key (raises KeyError
     22                         # if no such key)
     23         flag = d.has_key(key)   # true if the key exists; same as "key in d"
     24         list = d.keys() # a list of all existing keys (slow!)
     25 
     26         d.close()       # close it
     27 
     28 Dependent on the implementation, closing a persistent dictionary may
     29 or may not be necessary to flush changes to disk.
     30 
     31 Normally, d[key] returns a COPY of the entry.  This needs care when
     32 mutable entries are mutated: for example, if d[key] is a list,
     33         d[key].append(anitem)
     34 does NOT modify the entry d[key] itself, as stored in the persistent
     35 mapping -- it only modifies the copy, which is then immediately
     36 discarded, so that the append has NO effect whatsoever.  To append an
     37 item to d[key] in a way that will affect the persistent mapping, use:
     38         data = d[key]
     39         data.append(anitem)
     40         d[key] = data
     41 
     42 To avoid the problem with mutable entries, you may pass the keyword
     43 argument writeback=True in the call to shelve.open.  When you use:
     44         d = shelve.open(filename, writeback=True)
     45 then d keeps a cache of all entries you access, and writes them all back
     46 to the persistent mapping when you call d.close().  This ensures that
     47 such usage as d[key].append(anitem) works as intended.
     48 
     49 However, using keyword argument writeback=True may consume vast amount
     50 of memory for the cache, and it may make d.close() very slow, if you
     51 access many of d's entries after opening it in this way: d has no way to
     52 check which of the entries you access are mutable and/or which ones you
     53 actually mutate, so it must cache, and write back at close, all of the
     54 entries that you access.  You can call d.sync() to write back all the
     55 entries in the cache, and empty the cache (d.sync() also synchronizes
     56 the persistent dictionary on disk, if feasible).
     57 """
     58 
     59 # Try using cPickle and cStringIO if available.

     60 
     61 try:
     62     from cPickle import Pickler, Unpickler
     63 except ImportError:
     64     from pickle import Pickler, Unpickler
     65 
     66 try:
     67     from cStringIO import StringIO
     68 except ImportError:
     69     from StringIO import StringIO
     70 
     71 import UserDict
     72 
     73 __all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
     74 
     75 class _ClosedDict(UserDict.DictMixin):
     76     'Marker for a closed dict.  Access attempts raise a ValueError.'
     77 
     78     def closed(self, *args):
     79         raise ValueError('invalid operation on closed shelf')
     80     __getitem__ = __setitem__ = __delitem__ = keys = closed
     81 
     82     def __repr__(self):
     83         return '<Closed Dictionary>'
     84 
     85 class Shelf(UserDict.DictMixin):
     86     """Base class for shelf implementations.
     87 
     88     This is initialized with a dictionary-like object.
     89     See the module's __doc__ string for an overview of the interface.
     90     """
     91 
     92     def __init__(self, dict, protocol=None, writeback=False):
     93         self.dict = dict
     94         if protocol is None:
     95             protocol = 0
     96         self._protocol = protocol
     97         self.writeback = writeback
     98         self.cache = {}
     99 
    100     def keys(self):
    101         return self.dict.keys()
    102 
    103     def __len__(self):
    104         return len(self.dict)
    105 
    106     def has_key(self, key):
    107         return key in self.dict
    108 
    109     def __contains__(self, key):
    110         return key in self.dict
    111 
    112     def get(self, key, default=None):
    113         if key in self.dict:
    114             return self[key]
    115         return default
    116 
    117     def __getitem__(self, key):
    118         try:
    119             value = self.cache[key]
    120         except KeyError:
    121             f = StringIO(self.dict[key])
    122             value = Unpickler(f).load()
    123             if self.writeback:
    124                 self.cache[key] = value
    125         return value
    126 
    127     def __setitem__(self, key, value):
    128         if self.writeback:
    129             self.cache[key] = value
    130         f = StringIO()
    131         p = Pickler(f, self._protocol)
    132         p.dump(value)
    133         self.dict[key] = f.getvalue()
    134 
    135     def __delitem__(self, key):
    136         del self.dict[key]
    137         try:
    138             del self.cache[key]
    139         except KeyError:
    140             pass
    141 
    142     def close(self):
    143         self.sync()
    144         try:
    145             self.dict.close()
    146         except AttributeError:
    147             pass
    148         # Catch errors that may happen when close is called from __del__

    149         # because CPython is in interpreter shutdown.

    150         try:
    151             self.dict = _ClosedDict()
    152         except (NameError, TypeError):
    153             self.dict = None
    154 
    155     def __del__(self):
    156         if not hasattr(self, 'writeback'):
    157             # __init__ didn't succeed, so don't bother closing

    158             return
    159         self.close()
    160 
    161     def sync(self):
    162         if self.writeback and self.cache:
    163             self.writeback = False
    164             for key, entry in self.cache.iteritems():
    165                 self[key] = entry
    166             self.writeback = True
    167             self.cache = {}
    168         if hasattr(self.dict, 'sync'):
    169             self.dict.sync()
    170 
    171 
    172 class BsdDbShelf(Shelf):
    173     """Shelf implementation using the "BSD" db interface.
    174 
    175     This adds methods first(), next(), previous(), last() and
    176     set_location() that have no counterpart in [g]dbm databases.
    177 
    178     The actual database must be opened using one of the "bsddb"
    179     modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
    180     bsddb.rnopen) and passed to the constructor.
    181 
    182     See the module's __doc__ string for an overview of the interface.
    183     """
    184 
    185     def __init__(self, dict, protocol=None, writeback=False):
    186         Shelf.__init__(self, dict, protocol, writeback)
    187 
    188     def set_location(self, key):
    189         (key, value) = self.dict.set_location(key)
    190         f = StringIO(value)
    191         return (key, Unpickler(f).load())
    192 
    193     def next(self):
    194         (key, value) = self.dict.next()
    195         f = StringIO(value)
    196         return (key, Unpickler(f).load())
    197 
    198     def previous(self):
    199         (key, value) = self.dict.previous()
    200         f = StringIO(value)
    201         return (key, Unpickler(f).load())
    202 
    203     def first(self):
    204         (key, value) = self.dict.first()
    205         f = StringIO(value)
    206         return (key, Unpickler(f).load())
    207 
    208     def last(self):
    209         (key, value) = self.dict.last()
    210         f = StringIO(value)
    211         return (key, Unpickler(f).load())
    212 
    213 
    214 class DbfilenameShelf(Shelf):
    215     """Shelf implementation using the "anydbm" generic dbm interface.
    216 
    217     This is initialized with the filename for the dbm database.
    218     See the module's __doc__ string for an overview of the interface.
    219     """
    220 
    221     def __init__(self, filename, flag='c', protocol=None, writeback=False):
    222         import anydbm
    223         Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback)
    224 
    225 
    226 def open(filename, flag='c', protocol=None, writeback=False):
    227     """Open a persistent dictionary for reading and writing.
    228 
    229     The filename parameter is the base filename for the underlying
    230     database.  As a side-effect, an extension may be added to the
    231     filename and more than one file may be created.  The optional flag
    232     parameter has the same interpretation as the flag parameter of
    233     anydbm.open(). The optional protocol parameter specifies the
    234     version of the pickle protocol (0, 1, or 2).
    235 
    236     See the module's __doc__ string for an overview of the interface.
    237     """
    238 
    239     return DbfilenameShelf(filename, flag, protocol, writeback)
    240