Home | History | Annotate | Download | only in util
      1 # -*- coding: iso-8859-15 -*-
      2 """Immutable integer set type.
      3 
      4 Integer set class.
      5 
      6 Copyright (C) 2006, Heiko Wundram.
      7 Released under the MIT license.
      8 """
      9 import six
     10 
     11 # Version information
     12 # -------------------
     13 
     14 __author__ = "Heiko Wundram <me (at] modelnine.org>"
     15 __version__ = "0.2"
     16 __revision__ = "6"
     17 __date__ = "2006-01-20"
     18 
     19 
     20 # Utility classes
     21 # ---------------
     22 
     23 class _Infinity(object):
     24     """Internal type used to represent infinity values."""
     25 
     26     __slots__ = ["_neg"]
     27 
     28     def __init__(self,neg):
     29         self._neg = neg
     30 
     31     def __lt__(self,value):
     32         if not isinstance(value, _VALID_TYPES):
     33             return NotImplemented
     34         return ( self._neg and
     35                  not ( isinstance(value,_Infinity) and value._neg ) )
     36 
     37     def __le__(self,value):
     38         if not isinstance(value, _VALID_TYPES):
     39             return NotImplemented
     40         return self._neg
     41 
     42     def __gt__(self,value):
     43         if not isinstance(value, _VALID_TYPES):
     44             return NotImplemented
     45         return not ( self._neg or
     46                      ( isinstance(value,_Infinity) and not value._neg ) )
     47 
     48     def __ge__(self,value):
     49         if not isinstance(value, _VALID_TYPES):
     50             return NotImplemented
     51         return not self._neg
     52 
     53     def __eq__(self,value):
     54         if not isinstance(value, _VALID_TYPES):
     55             return NotImplemented
     56         return isinstance(value,_Infinity) and self._neg == value._neg
     57 
     58     def __ne__(self,value):
     59         if not isinstance(value, _VALID_TYPES):
     60             return NotImplemented
     61         return not isinstance(value,_Infinity) or self._neg != value._neg
     62 
     63     def __repr__(self):
     64         return "None"
     65 
     66 _VALID_TYPES = six.integer_types + (_Infinity,)
     67 
     68 
     69 
     70 # Constants
     71 # ---------
     72 
     73 _MININF = _Infinity(True)
     74 _MAXINF = _Infinity(False)
     75 
     76 
     77 # Integer set class
     78 # -----------------
     79 
     80 class IntSet(object):
     81     """Integer set class with efficient storage in a RLE format of ranges.
     82     Supports minus and plus infinity in the range."""
     83 
     84     __slots__ = ["_ranges","_min","_max","_hash"]
     85 
     86     def __init__(self,*args,**kwargs):
     87         """Initialize an integer set. The constructor accepts an unlimited
     88         number of arguments that may either be tuples in the form of
     89         (start,stop) where either start or stop may be a number or None to
     90         represent maximum/minimum in that direction. The range specified by
     91         (start,stop) is always inclusive (differing from the builtin range
     92         operator).
     93 
     94         Keyword arguments that can be passed to an integer set are min and
     95         max, which specify the minimum and maximum number in the set,
     96         respectively. You can also pass None here to represent minus or plus
     97         infinity, which is also the default.
     98         """
     99 
    100         # Special case copy constructor.
    101         if len(args) == 1 and isinstance(args[0],IntSet):
    102             if kwargs:
    103                 raise ValueError("No keyword arguments for copy constructor.")
    104             self._min = args[0]._min
    105             self._max = args[0]._max
    106             self._ranges = args[0]._ranges
    107             self._hash = args[0]._hash
    108             return
    109 
    110         # Initialize set.
    111         self._ranges = []
    112 
    113         # Process keyword arguments.
    114         self._min = kwargs.pop("min",_MININF)
    115         self._max = kwargs.pop("max",_MAXINF)
    116         if self._min is None:
    117             self._min = _MININF
    118         if self._max is None:
    119             self._max = _MAXINF
    120 
    121         # Check keyword arguments.
    122         if kwargs:
    123             raise ValueError("Invalid keyword argument.")
    124         if not ( isinstance(self._min, six.integer_types) or self._min is _MININF ):
    125             raise TypeError("Invalid type of min argument.")
    126         if not ( isinstance(self._max, six.integer_types) or self._max is _MAXINF ):
    127             raise TypeError("Invalid type of max argument.")
    128         if ( self._min is not _MININF and self._max is not _MAXINF and
    129              self._min > self._max ):
    130             raise ValueError("Minimum is not smaller than maximum.")
    131         if isinstance(self._max, six.integer_types):
    132             self._max += 1
    133 
    134         # Process arguments.
    135         for arg in args:
    136             if isinstance(arg, six.integer_types):
    137                 start, stop = arg, arg+1
    138             elif isinstance(arg,tuple):
    139                 if len(arg) != 2:
    140                     raise ValueError("Invalid tuple, must be (start,stop).")
    141 
    142                 # Process argument.
    143                 start, stop = arg
    144                 if start is None:
    145                     start = self._min
    146                 if stop is None:
    147                     stop = self._max
    148 
    149                 # Check arguments.
    150                 if not ( isinstance(start, six.integer_types) or start is _MININF ):
    151                     raise TypeError("Invalid type of tuple start.")
    152                 if not ( isinstance(stop, six.integer_types) or stop is _MAXINF ):
    153                     raise TypeError("Invalid type of tuple stop.")
    154                 if ( start is not _MININF and stop is not _MAXINF and
    155                      start > stop ):
    156                     continue
    157                 if isinstance(stop, six.integer_types):
    158                     stop += 1
    159             else:
    160                 raise TypeError("Invalid argument.")
    161 
    162             if start > self._max:
    163                 continue
    164             elif start < self._min:
    165                 start = self._min
    166             if stop < self._min:
    167                 continue
    168             elif stop > self._max:
    169                 stop = self._max
    170             self._ranges.append((start,stop))
    171 
    172         # Normalize set.
    173         self._normalize()
    174 
    175     # Utility functions for set operations
    176     # ------------------------------------
    177 
    178     def _iterranges(self,r1,r2,minval=_MININF,maxval=_MAXINF):
    179         curval = minval
    180         curstates = {"r1":False,"r2":False}
    181         imax, jmax = 2*len(r1), 2*len(r2)
    182         i, j = 0, 0
    183         while i < imax or j < jmax:
    184             if i < imax and ( ( j < jmax and
    185                                 r1[i>>1][i&1] < r2[j>>1][j&1] ) or
    186                               j == jmax ):
    187                 cur_r, newname, newstate = r1[i>>1][i&1], "r1", not (i&1)
    188                 i += 1
    189             else:
    190                 cur_r, newname, newstate = r2[j>>1][j&1], "r2", not (j&1)
    191                 j += 1
    192             if curval < cur_r:
    193                 if cur_r > maxval:
    194                     break
    195                 yield curstates, (curval,cur_r)
    196                 curval = cur_r
    197             curstates[newname] = newstate
    198         if curval < maxval:
    199             yield curstates, (curval,maxval)
    200 
    201     def _normalize(self):
    202         self._ranges.sort()
    203         i = 1
    204         while i < len(self._ranges):
    205             if self._ranges[i][0] < self._ranges[i-1][1]:
    206                 self._ranges[i-1] = (self._ranges[i-1][0],
    207                                      max(self._ranges[i-1][1],
    208                                          self._ranges[i][1]))
    209                 del self._ranges[i]
    210             else:
    211                 i += 1
    212         self._ranges = tuple(self._ranges)
    213         self._hash = hash(self._ranges)
    214 
    215     def __coerce__(self,other):
    216         if isinstance(other,IntSet):
    217             return self, other
    218         elif isinstance(other, six.integer_types + (tuple,)):
    219             try:
    220                 return self, self.__class__(other)
    221             except TypeError:
    222                 # Catch a type error, in that case the structure specified by
    223                 # other is something we can't coerce, return NotImplemented.
    224                 # ValueErrors are not caught, they signal that the data was
    225                 # invalid for the constructor. This is appropriate to signal
    226                 # as a ValueError to the caller.
    227                 return NotImplemented
    228         elif isinstance(other,list):
    229             try:
    230                 return self, self.__class__(*other)
    231             except TypeError:
    232                 # See above.
    233                 return NotImplemented
    234         return NotImplemented
    235 
    236     # Set function definitions
    237     # ------------------------
    238 
    239     def _make_function(name,type,doc,pall,pany=None):
    240         """Makes a function to match two ranges. Accepts two types: either
    241         'set', which defines a function which returns a set with all ranges
    242         matching pall (pany is ignored), or 'bool', which returns True if pall
    243         matches for all ranges and pany matches for any one range. doc is the
    244         dostring to give this function. pany may be none to ignore the any
    245         match.
    246 
    247         The predicates get a dict with two keys, 'r1', 'r2', which denote
    248         whether the current range is present in range1 (self) and/or range2
    249         (other) or none of the two, respectively."""
    250 
    251         if type == "set":
    252             def f(self,other):
    253                 coerced = self.__coerce__(other)
    254                 if coerced is NotImplemented:
    255                     return NotImplemented
    256                 other = coerced[1]
    257                 newset = self.__class__.__new__(self.__class__)
    258                 newset._min = min(self._min,other._min)
    259                 newset._max = max(self._max,other._max)
    260                 newset._ranges = []
    261                 for states, (start,stop) in \
    262                         self._iterranges(self._ranges,other._ranges,
    263                                          newset._min,newset._max):
    264                     if pall(states):
    265                         if newset._ranges and newset._ranges[-1][1] == start:
    266                             newset._ranges[-1] = (newset._ranges[-1][0],stop)
    267                         else:
    268                             newset._ranges.append((start,stop))
    269                 newset._ranges = tuple(newset._ranges)
    270                 newset._hash = hash(self._ranges)
    271                 return newset
    272         elif type == "bool":
    273             def f(self,other):
    274                 coerced = self.__coerce__(other)
    275                 if coerced is NotImplemented:
    276                     return NotImplemented
    277                 other = coerced[1]
    278                 _min = min(self._min,other._min)
    279                 _max = max(self._max,other._max)
    280                 found = not pany
    281                 for states, (start,stop) in \
    282                         self._iterranges(self._ranges,other._ranges,_min,_max):
    283                     if not pall(states):
    284                         return False
    285                     found = found or pany(states)
    286                 return found
    287         else:
    288             raise ValueError("Invalid type of function to create.")
    289         try:
    290             f.func_name = name
    291         except TypeError:
    292             pass
    293         f.func_doc = doc
    294         return f
    295 
    296     # Intersection.
    297     __and__ = _make_function("__and__","set",
    298                              "Intersection of two sets as a new set.",
    299                              lambda s: s["r1"] and s["r2"])
    300     __rand__ = _make_function("__rand__","set",
    301                               "Intersection of two sets as a new set.",
    302                               lambda s: s["r1"] and s["r2"])
    303     intersection = _make_function("intersection","set",
    304                                   "Intersection of two sets as a new set.",
    305                                   lambda s: s["r1"] and s["r2"])
    306 
    307     # Union.
    308     __or__ = _make_function("__or__","set",
    309                             "Union of two sets as a new set.",
    310                             lambda s: s["r1"] or s["r2"])
    311     __ror__ = _make_function("__ror__","set",
    312                              "Union of two sets as a new set.",
    313                              lambda s: s["r1"] or s["r2"])
    314     union = _make_function("union","set",
    315                            "Union of two sets as a new set.",
    316                            lambda s: s["r1"] or s["r2"])
    317 
    318     # Difference.
    319     __sub__ = _make_function("__sub__","set",
    320                              "Difference of two sets as a new set.",
    321                              lambda s: s["r1"] and not s["r2"])
    322     __rsub__ = _make_function("__rsub__","set",
    323                               "Difference of two sets as a new set.",
    324                               lambda s: s["r2"] and not s["r1"])
    325     difference = _make_function("difference","set",
    326                                 "Difference of two sets as a new set.",
    327                                 lambda s: s["r1"] and not s["r2"])
    328 
    329     # Symmetric difference.
    330     __xor__ = _make_function("__xor__","set",
    331                              "Symmetric difference of two sets as a new set.",
    332                              lambda s: s["r1"] ^ s["r2"])
    333     __rxor__ = _make_function("__rxor__","set",
    334                               "Symmetric difference of two sets as a new set.",
    335                               lambda s: s["r1"] ^ s["r2"])
    336     symmetric_difference = _make_function("symmetric_difference","set",
    337                                           "Symmetric difference of two sets as a new set.",
    338                                           lambda s: s["r1"] ^ s["r2"])
    339 
    340     # Containership testing.
    341     __contains__ = _make_function("__contains__","bool",
    342                                   "Returns true if self is superset of other.",
    343                                   lambda s: s["r1"] or not s["r2"])
    344     issubset = _make_function("issubset","bool",
    345                               "Returns true if self is subset of other.",
    346                               lambda s: s["r2"] or not s["r1"])
    347     istruesubset = _make_function("istruesubset","bool",
    348                                   "Returns true if self is true subset of other.",
    349                                   lambda s: s["r2"] or not s["r1"],
    350                                   lambda s: s["r2"] and not s["r1"])
    351     issuperset = _make_function("issuperset","bool",
    352                                 "Returns true if self is superset of other.",
    353                                 lambda s: s["r1"] or not s["r2"])
    354     istruesuperset = _make_function("istruesuperset","bool",
    355                                     "Returns true if self is true superset of other.",
    356                                     lambda s: s["r1"] or not s["r2"],
    357                                     lambda s: s["r1"] and not s["r2"])
    358     overlaps = _make_function("overlaps","bool",
    359                               "Returns true if self overlaps with other.",
    360                               lambda s: True,
    361                               lambda s: s["r1"] and s["r2"])
    362 
    363     # Comparison.
    364     __eq__ = _make_function("__eq__","bool",
    365                             "Returns true if self is equal to other.",
    366                             lambda s: not ( s["r1"] ^ s["r2"] ))
    367     __ne__ = _make_function("__ne__","bool",
    368                             "Returns true if self is different to other.",
    369                             lambda s: True,
    370                             lambda s: s["r1"] ^ s["r2"])
    371 
    372     # Clean up namespace.
    373     del _make_function
    374 
    375     # Define other functions.
    376     def inverse(self):
    377         """Inverse of set as a new set."""
    378 
    379         newset = self.__class__.__new__(self.__class__)
    380         newset._min = self._min
    381         newset._max = self._max
    382         newset._ranges = []
    383         laststop = self._min
    384         for r in self._ranges:
    385             if laststop < r[0]:
    386                 newset._ranges.append((laststop,r[0]))
    387                 laststop = r[1]
    388         if laststop < self._max:
    389             newset._ranges.append((laststop,self._max))
    390         return newset
    391 
    392     __invert__ = inverse
    393 
    394     # Hashing
    395     # -------
    396 
    397     def __hash__(self):
    398         """Returns a hash value representing this integer set. As the set is
    399         always stored normalized, the hash value is guaranteed to match for
    400         matching ranges."""
    401 
    402         return self._hash
    403 
    404     # Iterating
    405     # ---------
    406 
    407     def __len__(self):
    408         """Get length of this integer set. In case the length is larger than
    409         2**31 (including infinitely sized integer sets), it raises an
    410         OverflowError. This is due to len() restricting the size to
    411         0 <= len < 2**31."""
    412 
    413         if not self._ranges:
    414             return 0
    415         if self._ranges[0][0] is _MININF or self._ranges[-1][1] is _MAXINF:
    416             raise OverflowError("Infinitely sized integer set.")
    417         rlen = 0
    418         for r in self._ranges:
    419             rlen += r[1]-r[0]
    420         if rlen >= 2**31:
    421             raise OverflowError("Integer set bigger than 2**31.")
    422         return rlen
    423 
    424     def len(self):
    425         """Returns the length of this integer set as an integer. In case the
    426         length is infinite, returns -1. This function exists because of a
    427         limitation of the builtin len() function which expects values in
    428         the range 0 <= len < 2**31. Use this function in case your integer
    429         set might be larger."""
    430 
    431         if not self._ranges:
    432             return 0
    433         if self._ranges[0][0] is _MININF or self._ranges[-1][1] is _MAXINF:
    434             return -1
    435         rlen = 0
    436         for r in self._ranges:
    437             rlen += r[1]-r[0]
    438         return rlen
    439 
    440     def __nonzero__(self):
    441         """Returns true if this integer set contains at least one item."""
    442 
    443         return bool(self._ranges)
    444 
    445     def __iter__(self):
    446         """Iterate over all values in this integer set. Iteration always starts
    447         by iterating from lowest to highest over the ranges that are bounded.
    448         After processing these, all ranges that are unbounded (maximum 2) are
    449         yielded intermixed."""
    450 
    451         ubranges = []
    452         for r in self._ranges:
    453             if r[0] is _MININF:
    454                 if r[1] is _MAXINF:
    455                     ubranges.extend(([0,1],[-1,-1]))
    456                 else:
    457                     ubranges.append([r[1]-1,-1])
    458             elif r[1] is _MAXINF:
    459                 ubranges.append([r[0],1])
    460             else:
    461                 for val in xrange(r[0],r[1]):
    462                     yield val
    463         if ubranges:
    464             while True:
    465                 for ubrange in ubranges:
    466                     yield ubrange[0]
    467                     ubrange[0] += ubrange[1]
    468 
    469     # Printing
    470     # --------
    471 
    472     def __repr__(self):
    473         """Return a representation of this integer set. The representation is
    474         executable to get an equal integer set."""
    475 
    476         rv = []
    477         for start, stop in self._ranges:
    478             if ( isinstance(start, six.integer_types) and isinstance(stop, six.integer_types)
    479                  and stop-start == 1 ):
    480                 rv.append("%r" % start)
    481             elif isinstance(stop, six.integer_types):
    482                 rv.append("(%r,%r)" % (start,stop-1))
    483             else:
    484                 rv.append("(%r,%r)" % (start,stop))
    485         if self._min is not _MININF:
    486             rv.append("min=%r" % self._min)
    487         if self._max is not _MAXINF:
    488             rv.append("max=%r" % self._max)
    489         return "%s(%s)" % (self.__class__.__name__,",".join(rv))
    490 
    491 if __name__ == "__main__":
    492     # Little test script demonstrating functionality.
    493     x = IntSet((10,20),30)
    494     y = IntSet((10,20))
    495     z = IntSet((10,20),30,(15,19),min=0,max=40)
    496     print(x)
    497     print(x&110)
    498     print(x|110)
    499     print(x^(15,25))
    500     print(x-12)
    501     print(12 in x)
    502     print(x.issubset(x))
    503     print(y.issubset(x))
    504     print(x.istruesubset(x))
    505     print(y.istruesubset(x))
    506     for val in x:
    507         print(val)
    508     print(x.inverse())
    509     print(x == z)
    510     print(x == y)
    511     print(x != y)
    512     print(hash(x))
    513     print(hash(z))
    514     print(len(x))
    515     print(x.len())
    516