Home | History | Annotate | Download | only in tables
      1 def lines_get(f):
      2   '''Parse a file like object, removing comments and returning a list of
      3      lines.'''
      4   def cut_comment(line):
      5     first_hash = line.find('#')
      6     if first_hash == -1:
      7       return line
      8     return line[:first_hash]
      9 
     10   return [x for x in [cut_comment(x[:-1]) for x in f.readlines()] if len(x)]
     11 
     12 def line_split(line):
     13   '''Split a line based on a semicolon separator.'''
     14   def normalise(word):
     15     return word.lstrip().rstrip()
     16   return [normalise(x) for x in line.split(';')]
     17 
     18 def codepoints_parse(token):
     19   '''Parse a Unicode style code-point range. Return either a single value or a
     20      tuple of (start, end) for a range of code-points.'''
     21   def fromHex(token):
     22     return int(token, 16)
     23   parts = token.split('..')
     24   if len(parts) == 2:
     25     return (fromHex(parts[0]), fromHex(parts[1]))
     26   elif len(parts) == 1:
     27     return fromHex(parts[0])
     28   else:
     29     raise ValueError(token)
     30 
     31 def unicode_file_parse(input, map, default_value = None):
     32   '''Parse a file like object, @input where the first column is a code-point
     33      range and the second column is mapped via the given dict, @map.'''
     34   ranges = []
     35   tokens = [line_split(x) for x in lines_get(input)]
     36   for line in tokens:
     37     if len(line) == 2:
     38       codepoints = codepoints_parse(line[0])
     39       value = map[line[1]]
     40       if value == default_value:
     41         continue
     42 
     43       if type(codepoints) == int:
     44         codepoints = (codepoints, codepoints)
     45 
     46       ranges.append((codepoints[0], codepoints[1], value))
     47     else:
     48       raise ValueError(line)
     49 
     50   return ranges
     51 
     52 def sort_and_merge(ranges):
     53   '''Given a list of (start, end, value), merge elements where the ranges are
     54      continuous and the values are the same.'''
     55   output = []
     56   ranges.sort()
     57   current = None
     58   for v in ranges:
     59     if current is None:
     60       current = v
     61       continue
     62     if current[1] + 1 == v[0] and current[2] == v[2]:
     63       current = (current[0], v[1], v[2])
     64     else:
     65       output.append(current)
     66       current = v
     67   if current is not None:
     68     output.append(current)
     69 
     70   return output
     71