1 # Copyright (C) 2014 The Android Open Source Project 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 15 from __future__ import print_function 16 import heapq 17 import itertools 18 19 __all__ = ["RangeSet"] 20 21 class RangeSet(object): 22 """A RangeSet represents a set of nonoverlapping ranges on the 23 integers (ie, a set of integers, but efficient when the set contains 24 lots of runs.""" 25 26 def __init__(self, data=None): 27 self.monotonic = False 28 if isinstance(data, str): 29 self._parse_internal(data) 30 elif data: 31 assert len(data) % 2 == 0 32 self.data = tuple(self._remove_pairs(data)) 33 self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:])) 34 else: 35 self.data = () 36 37 def __iter__(self): 38 for i in range(0, len(self.data), 2): 39 yield self.data[i:i+2] 40 41 def __eq__(self, other): 42 return self.data == other.data 43 44 def __ne__(self, other): 45 return self.data != other.data 46 47 def __nonzero__(self): 48 return bool(self.data) 49 50 def __str__(self): 51 if not self.data: 52 return "empty" 53 else: 54 return self.to_string() 55 56 def __repr__(self): 57 return '<RangeSet("' + self.to_string() + '")>' 58 59 @classmethod 60 def parse(cls, text): 61 """Parse a text string consisting of a space-separated list of 62 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to 63 include both their ends (so the above example represents 18 64 individual blocks. Returns a RangeSet object. 65 66 If the input has all its blocks in increasing order, then returned 67 RangeSet will have an extra attribute 'monotonic' that is set to 68 True. For example the input "10-20 30" is monotonic, but the input 69 "15-20 30 10-14" is not, even though they represent the same set 70 of blocks (and the two RangeSets will compare equal with ==). 71 """ 72 return cls(text) 73 74 @classmethod 75 def parse_raw(cls, text): 76 """Parse a string generated by RangeSet.to_string_raw(). 77 78 >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw()) 79 <RangeSet("0-9")> 80 """ 81 82 raw = [int(i) for i in text.split(',')] 83 assert raw[0] == len(raw[1:]), "Invalid raw string." 84 85 return cls(data=raw[1:]) 86 87 def _parse_internal(self, text): 88 data = [] 89 last = -1 90 monotonic = True 91 for p in text.split(): 92 if "-" in p: 93 s, e = (int(x) for x in p.split("-")) 94 data.append(s) 95 data.append(e+1) 96 if last <= s <= e: 97 last = e 98 else: 99 monotonic = False 100 else: 101 s = int(p) 102 data.append(s) 103 data.append(s+1) 104 if last <= s: 105 last = s+1 106 else: 107 monotonic = False 108 data.sort() 109 self.data = tuple(self._remove_pairs(data)) 110 self.monotonic = monotonic 111 112 @staticmethod 113 def _remove_pairs(source): 114 """Remove consecutive duplicate items to simplify the result. 115 116 [1, 2, 2, 5, 5, 10] will become [1, 10].""" 117 last = None 118 for i in source: 119 if i == last: 120 last = None 121 else: 122 if last is not None: 123 yield last 124 last = i 125 if last is not None: 126 yield last 127 128 def to_string(self): 129 out = [] 130 for i in range(0, len(self.data), 2): 131 s, e = self.data[i:i+2] 132 if e == s+1: 133 out.append(str(s)) 134 else: 135 out.append(str(s) + "-" + str(e-1)) 136 return " ".join(out) 137 138 def to_string_raw(self): 139 assert self.data 140 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data) 141 142 def union(self, other): 143 """Return a new RangeSet representing the union of this RangeSet 144 with the argument. 145 146 >>> RangeSet("10-19 30-34").union(RangeSet("18-29")) 147 <RangeSet("10-34")> 148 >>> RangeSet("10-19 30-34").union(RangeSet("22 32")) 149 <RangeSet("10-19 22 30-34")> 150 """ 151 out = [] 152 z = 0 153 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 154 zip(other.data, itertools.cycle((+1, -1)))): 155 if (z == 0 and d == 1) or (z == 1 and d == -1): 156 out.append(p) 157 z += d 158 return RangeSet(data=out) 159 160 def intersect(self, other): 161 """Return a new RangeSet representing the intersection of this 162 RangeSet with the argument. 163 164 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32")) 165 <RangeSet("18-19 30-32")> 166 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28")) 167 <RangeSet("")> 168 """ 169 out = [] 170 z = 0 171 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 172 zip(other.data, itertools.cycle((+1, -1)))): 173 if (z == 1 and d == 1) or (z == 2 and d == -1): 174 out.append(p) 175 z += d 176 return RangeSet(data=out) 177 178 def subtract(self, other): 179 """Return a new RangeSet representing subtracting the argument 180 from this RangeSet. 181 182 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32")) 183 <RangeSet("10-17 33-34")> 184 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28")) 185 <RangeSet("10-19 30-34")> 186 """ 187 188 out = [] 189 z = 0 190 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 191 zip(other.data, itertools.cycle((-1, +1)))): 192 if (z == 0 and d == 1) or (z == 1 and d == -1): 193 out.append(p) 194 z += d 195 return RangeSet(data=out) 196 197 def overlaps(self, other): 198 """Returns true if the argument has a nonempty overlap with this 199 RangeSet. 200 201 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32")) 202 True 203 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28")) 204 False 205 """ 206 207 # This is like intersect, but we can stop as soon as we discover the 208 # output is going to be nonempty. 209 z = 0 210 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 211 zip(other.data, itertools.cycle((+1, -1)))): 212 if (z == 1 and d == 1) or (z == 2 and d == -1): 213 return True 214 z += d 215 return False 216 217 def size(self): 218 """Returns the total size of the RangeSet (ie, how many integers 219 are in the set). 220 221 >>> RangeSet("10-19 30-34").size() 222 15 223 """ 224 225 total = 0 226 for i, p in enumerate(self.data): 227 if i % 2: 228 total += p 229 else: 230 total -= p 231 return total 232 233 def map_within(self, other): 234 """'other' should be a subset of 'self'. Returns a RangeSet 235 representing what 'other' would get translated to if the integers 236 of 'self' were translated down to be contiguous starting at zero. 237 238 >>> RangeSet("0-9").map_within(RangeSet("3-4")) 239 <RangeSet("3-4")> 240 >>> RangeSet("10-19").map_within(RangeSet("13-14")) 241 <RangeSet("3-4")> 242 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32")) 243 <RangeSet("7-12")> 244 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32")) 245 <RangeSet("2-3 7-12")> 246 """ 247 248 out = [] 249 offset = 0 250 start = None 251 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))), 252 zip(other.data, itertools.cycle((-1, +1)))): 253 if d == -5: 254 start = p 255 elif d == +5: 256 offset += p-start 257 start = None 258 else: 259 out.append(offset + p - start) 260 return RangeSet(data=out) 261 262 def extend(self, n): 263 """Extend the RangeSet by 'n' blocks. 264 265 The lower bound is guaranteed to be non-negative. 266 267 >>> RangeSet("0-9").extend(1) 268 <RangeSet("0-10")> 269 >>> RangeSet("10-19").extend(15) 270 <RangeSet("0-34")> 271 >>> RangeSet("10-19 30-39").extend(4) 272 <RangeSet("6-23 26-43")> 273 >>> RangeSet("10-19 30-39").extend(10) 274 <RangeSet("0-49")> 275 """ 276 out = self 277 for i in range(0, len(self.data), 2): 278 s, e = self.data[i:i+2] 279 s1 = max(0, s - n) 280 e1 = e + n 281 out = out.union(RangeSet(str(s1) + "-" + str(e1-1))) 282 return out 283 284 def first(self, n): 285 """Return the RangeSet that contains at most the first 'n' integers. 286 287 >>> RangeSet("0-9").first(1) 288 <RangeSet("0")> 289 >>> RangeSet("10-19").first(5) 290 <RangeSet("10-14")> 291 >>> RangeSet("10-19").first(15) 292 <RangeSet("10-19")> 293 >>> RangeSet("10-19 30-39").first(3) 294 <RangeSet("10-12")> 295 >>> RangeSet("10-19 30-39").first(15) 296 <RangeSet("10-19 30-34")> 297 >>> RangeSet("10-19 30-39").first(30) 298 <RangeSet("10-19 30-39")> 299 >>> RangeSet("0-9").first(0) 300 <RangeSet("")> 301 """ 302 303 if self.size() <= n: 304 return self 305 306 out = [] 307 for s, e in self: 308 if e - s >= n: 309 out += (s, s+n) 310 break 311 else: 312 out += (s, e) 313 n -= e - s 314 return RangeSet(data=out) 315 316 def next_item(self): 317 """Return the next integer represented by the RangeSet. 318 319 >>> list(RangeSet("0-9").next_item()) 320 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 321 >>> list(RangeSet("10-19 3-5").next_item()) 322 [3, 4, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] 323 >>> list(rangelib.RangeSet("10-19 3 5 7").next_item()) 324 [3, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] 325 """ 326 for s, e in self: 327 for element in range(s, e): 328 yield element 329 330 331 if __name__ == "__main__": 332 import doctest 333 doctest.testmod() 334