Home | History | Annotate | Download | only in Lib
      1 """A dumb and slow but simple dbm clone.
      2 
      3 For database spam, spam.dir contains the index (a text file),
      4 spam.bak *may* contain a backup of the index (also a text file),
      5 while spam.dat contains the data (a binary file).
      6 
      7 XXX TO DO:
      8 
      9 - seems to contain a bug when updating...
     10 
     11 - reclaim free space (currently, space once occupied by deleted or expanded
     12 items is never reused)
     13 
     14 - support concurrent access (currently, if two processes take turns making
     15 updates, they can mess up the index)
     16 
     17 - support efficient access to large databases (currently, the whole index
     18 is read when the database is opened, and some updates rewrite the whole index)
     19 
     20 - support opening for read-only (flag = 'm')
     21 
     22 """
     23 
     24 import ast as _ast
     25 import os as _os
     26 import __builtin__
     27 import UserDict
     28 
     29 _open = __builtin__.open
     30 
     31 _BLOCKSIZE = 512
     32 
     33 error = IOError                         # For anydbm
     34 
     35 class _Database(UserDict.DictMixin):
     36 
     37     # The on-disk directory and data files can remain in mutually
     38     # inconsistent states for an arbitrarily long time (see comments
     39     # at the end of __setitem__).  This is only repaired when _commit()
     40     # gets called.  One place _commit() gets called is from __del__(),
     41     # and if that occurs at program shutdown time, module globals may
     42     # already have gotten rebound to None.  Since it's crucial that
     43     # _commit() finish successfully, we can't ignore shutdown races
     44     # here, and _commit() must not reference any globals.
     45     _os = _os       # for _commit()
     46     _open = _open   # for _commit()
     47 
     48     def __init__(self, filebasename, mode, flag='c'):
     49         self._mode = mode
     50         self._readonly = (flag == 'r')
     51 
     52         # The directory file is a text file.  Each line looks like
     53         #    "%r, (%d, %d)\n" % (key, pos, siz)
     54         # where key is the string key, pos is the offset into the dat
     55         # file of the associated value's first byte, and siz is the number
     56         # of bytes in the associated value.
     57         self._dirfile = filebasename + _os.extsep + 'dir'
     58 
     59         # The data file is a binary file pointed into by the directory
     60         # file, and holds the values associated with keys.  Each value
     61         # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
     62         # binary 8-bit string value.
     63         self._datfile = filebasename + _os.extsep + 'dat'
     64         self._bakfile = filebasename + _os.extsep + 'bak'
     65 
     66         # The index is an in-memory dict, mirroring the directory file.
     67         self._index = None  # maps keys to (pos, siz) pairs
     68 
     69         # Mod by Jack: create data file if needed
     70         try:
     71             f = _open(self._datfile, 'r')
     72         except IOError:
     73             with _open(self._datfile, 'w') as f:
     74                 self._chmod(self._datfile)
     75         else:
     76             f.close()
     77         self._update()
     78 
     79     # Read directory file into the in-memory index dict.
     80     def _update(self):
     81         self._index = {}
     82         try:
     83             f = _open(self._dirfile)
     84         except IOError:
     85             self._modified = not self._readonly
     86         else:
     87             self._modified = False
     88             with f:
     89                 for line in f:
     90                     line = line.rstrip()
     91                     key, pos_and_siz_pair = _ast.literal_eval(line)
     92                     self._index[key] = pos_and_siz_pair
     93 
     94     # Write the index dict to the directory file.  The original directory
     95     # file (if any) is renamed with a .bak extension first.  If a .bak
     96     # file currently exists, it's deleted.
     97     def _commit(self):
     98         # CAUTION:  It's vital that _commit() succeed, and _commit() can
     99         # be called from __del__().  Therefore we must never reference a
    100         # global in this routine.
    101         if self._index is None or not self._modified:
    102             return  # nothing to do
    103 
    104         try:
    105             self._os.unlink(self._bakfile)
    106         except self._os.error:
    107             pass
    108 
    109         try:
    110             self._os.rename(self._dirfile, self._bakfile)
    111         except self._os.error:
    112             pass
    113 
    114         with self._open(self._dirfile, 'w') as f:
    115             self._chmod(self._dirfile)
    116             for key, pos_and_siz_pair in self._index.iteritems():
    117                 f.write("%r, %r\n" % (key, pos_and_siz_pair))
    118 
    119     sync = _commit
    120 
    121     def __getitem__(self, key):
    122         pos, siz = self._index[key]     # may raise KeyError
    123         with _open(self._datfile, 'rb') as f:
    124             f.seek(pos)
    125             dat = f.read(siz)
    126         return dat
    127 
    128     # Append val to the data file, starting at a _BLOCKSIZE-aligned
    129     # offset.  The data file is first padded with NUL bytes (if needed)
    130     # to get to an aligned offset.  Return pair
    131     #     (starting offset of val, len(val))
    132     def _addval(self, val):
    133         with _open(self._datfile, 'rb+') as f:
    134             f.seek(0, 2)
    135             pos = int(f.tell())
    136             npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
    137             f.write('\0'*(npos-pos))
    138             pos = npos
    139             f.write(val)
    140         return (pos, len(val))
    141 
    142     # Write val to the data file, starting at offset pos.  The caller
    143     # is responsible for ensuring that there's enough room starting at
    144     # pos to hold val, without overwriting some other value.  Return
    145     # pair (pos, len(val)).
    146     def _setval(self, pos, val):
    147         with _open(self._datfile, 'rb+') as f:
    148             f.seek(pos)
    149             f.write(val)
    150         return (pos, len(val))
    151 
    152     # key is a new key whose associated value starts in the data file
    153     # at offset pos and with length siz.  Add an index record to
    154     # the in-memory index dict, and append one to the directory file.
    155     def _addkey(self, key, pos_and_siz_pair):
    156         self._index[key] = pos_and_siz_pair
    157         with _open(self._dirfile, 'a') as f:
    158             self._chmod(self._dirfile)
    159             f.write("%r, %r\n" % (key, pos_and_siz_pair))
    160 
    161     def __setitem__(self, key, val):
    162         if not type(key) == type('') == type(val):
    163             raise TypeError, "keys and values must be strings"
    164         self._modified = True
    165         if key not in self._index:
    166             self._addkey(key, self._addval(val))
    167         else:
    168             # See whether the new value is small enough to fit in the
    169             # (padded) space currently occupied by the old value.
    170             pos, siz = self._index[key]
    171             oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
    172             newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
    173             if newblocks <= oldblocks:
    174                 self._index[key] = self._setval(pos, val)
    175             else:
    176                 # The new value doesn't fit in the (padded) space used
    177                 # by the old value.  The blocks used by the old value are
    178                 # forever lost.
    179                 self._index[key] = self._addval(val)
    180 
    181             # Note that _index may be out of synch with the directory
    182             # file now:  _setval() and _addval() don't update the directory
    183             # file.  This also means that the on-disk directory and data
    184             # files are in a mutually inconsistent state, and they'll
    185             # remain that way until _commit() is called.  Note that this
    186             # is a disaster (for the database) if the program crashes
    187             # (so that _commit() never gets called).
    188 
    189     def __delitem__(self, key):
    190         self._modified = True
    191         # The blocks used by the associated value are lost.
    192         del self._index[key]
    193         # XXX It's unclear why we do a _commit() here (the code always
    194         # XXX has, so I'm not changing it).  _setitem__ doesn't try to
    195         # XXX keep the directory file in synch.  Why should we?  Or
    196         # XXX why shouldn't __setitem__?
    197         self._commit()
    198 
    199     def keys(self):
    200         return self._index.keys()
    201 
    202     def has_key(self, key):
    203         return key in self._index
    204 
    205     def __contains__(self, key):
    206         return key in self._index
    207 
    208     def iterkeys(self):
    209         return self._index.iterkeys()
    210     __iter__ = iterkeys
    211 
    212     def __len__(self):
    213         return len(self._index)
    214 
    215     def close(self):
    216         try:
    217             self._commit()
    218         finally:
    219             self._index = self._datfile = self._dirfile = self._bakfile = None
    220 
    221     __del__ = close
    222 
    223     def _chmod (self, file):
    224         if hasattr(self._os, 'chmod'):
    225             self._os.chmod(file, self._mode)
    226 
    227 
    228 def open(file, flag=None, mode=0666):
    229     """Open the database file, filename, and return corresponding object.
    230 
    231     The flag argument, used to control how the database is opened in the
    232     other DBM implementations, is ignored in the dumbdbm module; the
    233     database is always opened for update, and will be created if it does
    234     not exist.
    235 
    236     The optional mode argument is the UNIX mode of the file, used only when
    237     the database has to be created.  It defaults to octal code 0666 (and
    238     will be modified by the prevailing umask).
    239 
    240     """
    241     # flag argument is currently ignored
    242 
    243     # Modify mode depending on the umask
    244     try:
    245         um = _os.umask(0)
    246         _os.umask(um)
    247     except AttributeError:
    248         pass
    249     else:
    250         # Turn off any bits that are set in the umask
    251         mode = mode & (~um)
    252 
    253     return _Database(file, mode, flag)
    254