Home | History | Annotate | Download | only in Lib
      1 """Manage shelves of pickled objects.
      2 
      3 A "shelf" is a persistent, dictionary-like object.  The difference
      4 with dbm databases is that the values (not the keys!) in a shelf can
      5 be essentially arbitrary Python objects -- anything that the "pickle"
      6 module can handle.  This includes most class instances, recursive data
      7 types, and objects containing lots of shared sub-objects.  The keys
      8 are ordinary strings.
      9 
     10 To summarize the interface (key is a string, data is an arbitrary
     11 object):
     12 
     13         import shelve
     14         d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
     15 
     16         d[key] = data   # store data at key (overwrites old data if
     17                         # using an existing key)
     18         data = d[key]   # retrieve a COPY of the data at key (raise
     19                         # KeyError if no such key) -- NOTE that this
     20                         # access returns a *copy* of the entry!
     21         del d[key]      # delete data stored at key (raises KeyError
     22                         # if no such key)
     23         flag = key in d # true if the key exists
     24         list = d.keys() # a list of all existing keys (slow!)
     25 
     26         d.close()       # close it
     27 
     28 Dependent on the implementation, closing a persistent dictionary may
     29 or may not be necessary to flush changes to disk.
     30 
     31 Normally, d[key] returns a COPY of the entry.  This needs care when
     32 mutable entries are mutated: for example, if d[key] is a list,
     33         d[key].append(anitem)
     34 does NOT modify the entry d[key] itself, as stored in the persistent
     35 mapping -- it only modifies the copy, which is then immediately
     36 discarded, so that the append has NO effect whatsoever.  To append an
     37 item to d[key] in a way that will affect the persistent mapping, use:
     38         data = d[key]
     39         data.append(anitem)
     40         d[key] = data
     41 
     42 To avoid the problem with mutable entries, you may pass the keyword
     43 argument writeback=True in the call to shelve.open.  When you use:
     44         d = shelve.open(filename, writeback=True)
     45 then d keeps a cache of all entries you access, and writes them all back
     46 to the persistent mapping when you call d.close().  This ensures that
     47 such usage as d[key].append(anitem) works as intended.
     48 
     49 However, using keyword argument writeback=True may consume vast amount
     50 of memory for the cache, and it may make d.close() very slow, if you
     51 access many of d's entries after opening it in this way: d has no way to
     52 check which of the entries you access are mutable and/or which ones you
     53 actually mutate, so it must cache, and write back at close, all of the
     54 entries that you access.  You can call d.sync() to write back all the
     55 entries in the cache, and empty the cache (d.sync() also synchronizes
     56 the persistent dictionary on disk, if feasible).
     57 """
     58 
     59 from pickle import Pickler, Unpickler
     60 from io import BytesIO
     61 
     62 import collections
     63 
     64 __all__ = ["Shelf", "BsdDbShelf", "DbfilenameShelf", "open"]
     65 
     66 class _ClosedDict(collections.MutableMapping):
     67     'Marker for a closed dict.  Access attempts raise a ValueError.'
     68 
     69     def closed(self, *args):
     70         raise ValueError('invalid operation on closed shelf')
     71     __iter__ = __len__ = __getitem__ = __setitem__ = __delitem__ = keys = closed
     72 
     73     def __repr__(self):
     74         return '<Closed Dictionary>'
     75 
     76 
     77 class Shelf(collections.MutableMapping):
     78     """Base class for shelf implementations.
     79 
     80     This is initialized with a dictionary-like object.
     81     See the module's __doc__ string for an overview of the interface.
     82     """
     83 
     84     def __init__(self, dict, protocol=None, writeback=False,
     85                  keyencoding="utf-8"):
     86         self.dict = dict
     87         if protocol is None:
     88             protocol = 3
     89         self._protocol = protocol
     90         self.writeback = writeback
     91         self.cache = {}
     92         self.keyencoding = keyencoding
     93 
     94     def __iter__(self):
     95         for k in self.dict.keys():
     96             yield k.decode(self.keyencoding)
     97 
     98     def __len__(self):
     99         return len(self.dict)
    100 
    101     def __contains__(self, key):
    102         return key.encode(self.keyencoding) in self.dict
    103 
    104     def get(self, key, default=None):
    105         if key.encode(self.keyencoding) in self.dict:
    106             return self[key]
    107         return default
    108 
    109     def __getitem__(self, key):
    110         try:
    111             value = self.cache[key]
    112         except KeyError:
    113             f = BytesIO(self.dict[key.encode(self.keyencoding)])
    114             value = Unpickler(f).load()
    115             if self.writeback:
    116                 self.cache[key] = value
    117         return value
    118 
    119     def __setitem__(self, key, value):
    120         if self.writeback:
    121             self.cache[key] = value
    122         f = BytesIO()
    123         p = Pickler(f, self._protocol)
    124         p.dump(value)
    125         self.dict[key.encode(self.keyencoding)] = f.getvalue()
    126 
    127     def __delitem__(self, key):
    128         del self.dict[key.encode(self.keyencoding)]
    129         try:
    130             del self.cache[key]
    131         except KeyError:
    132             pass
    133 
    134     def __enter__(self):
    135         return self
    136 
    137     def __exit__(self, type, value, traceback):
    138         self.close()
    139 
    140     def close(self):
    141         if self.dict is None:
    142             return
    143         try:
    144             self.sync()
    145             try:
    146                 self.dict.close()
    147             except AttributeError:
    148                 pass
    149         finally:
    150             # Catch errors that may happen when close is called from __del__
    151             # because CPython is in interpreter shutdown.
    152             try:
    153                 self.dict = _ClosedDict()
    154             except:
    155                 self.dict = None
    156 
    157     def __del__(self):
    158         if not hasattr(self, 'writeback'):
    159             # __init__ didn't succeed, so don't bother closing
    160             # see http://bugs.python.org/issue1339007 for details
    161             return
    162         self.close()
    163 
    164     def sync(self):
    165         if self.writeback and self.cache:
    166             self.writeback = False
    167             for key, entry in self.cache.items():
    168                 self[key] = entry
    169             self.writeback = True
    170             self.cache = {}
    171         if hasattr(self.dict, 'sync'):
    172             self.dict.sync()
    173 
    174 
    175 class BsdDbShelf(Shelf):
    176     """Shelf implementation using the "BSD" db interface.
    177 
    178     This adds methods first(), next(), previous(), last() and
    179     set_location() that have no counterpart in [g]dbm databases.
    180 
    181     The actual database must be opened using one of the "bsddb"
    182     modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
    183     bsddb.rnopen) and passed to the constructor.
    184 
    185     See the module's __doc__ string for an overview of the interface.
    186     """
    187 
    188     def __init__(self, dict, protocol=None, writeback=False,
    189                  keyencoding="utf-8"):
    190         Shelf.__init__(self, dict, protocol, writeback, keyencoding)
    191 
    192     def set_location(self, key):
    193         (key, value) = self.dict.set_location(key)
    194         f = BytesIO(value)
    195         return (key.decode(self.keyencoding), Unpickler(f).load())
    196 
    197     def next(self):
    198         (key, value) = next(self.dict)
    199         f = BytesIO(value)
    200         return (key.decode(self.keyencoding), Unpickler(f).load())
    201 
    202     def previous(self):
    203         (key, value) = self.dict.previous()
    204         f = BytesIO(value)
    205         return (key.decode(self.keyencoding), Unpickler(f).load())
    206 
    207     def first(self):
    208         (key, value) = self.dict.first()
    209         f = BytesIO(value)
    210         return (key.decode(self.keyencoding), Unpickler(f).load())
    211 
    212     def last(self):
    213         (key, value) = self.dict.last()
    214         f = BytesIO(value)
    215         return (key.decode(self.keyencoding), Unpickler(f).load())
    216 
    217 
    218 class DbfilenameShelf(Shelf):
    219     """Shelf implementation using the "dbm" generic dbm interface.
    220 
    221     This is initialized with the filename for the dbm database.
    222     See the module's __doc__ string for an overview of the interface.
    223     """
    224 
    225     def __init__(self, filename, flag='c', protocol=None, writeback=False):
    226         import dbm
    227         Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback)
    228 
    229 
    230 def open(filename, flag='c', protocol=None, writeback=False):
    231     """Open a persistent dictionary for reading and writing.
    232 
    233     The filename parameter is the base filename for the underlying
    234     database.  As a side-effect, an extension may be added to the
    235     filename and more than one file may be created.  The optional flag
    236     parameter has the same interpretation as the flag parameter of
    237     dbm.open(). The optional protocol parameter specifies the
    238     version of the pickle protocol (0, 1, or 2).
    239 
    240     See the module's __doc__ string for an overview of the interface.
    241     """
    242 
    243     return DbfilenameShelf(filename, flag, protocol, writeback)
    244