1 import sys 2 3 # http://www.unicode.org/Public/UNIDATA/auxiliary/BidiMirroring.txt 4 5 # This parses a file in the format of the above file and outputs a table 6 # suitable for bsearch(3). This table maps Unicode code points to their 7 # 'mirror'. (Mirroring is used when rendering RTL characters, see the Unicode 8 # standard). By convention, this mapping should be commutative, but this code 9 # doesn't enforce or check this. 10 11 def main(infile, outfile): 12 pairs = [] 13 for line in infile: 14 line = line[:-1] 15 if len(line) == 0 or line[0] == '#': 16 continue 17 if '#' in line: 18 (data, _) = line.split('#', 1) 19 else: 20 data = line 21 if ';' not in data: 22 continue 23 (a, b) = data.split(';', 1) 24 a = int(a, 16) 25 b = int(b, 16) 26 27 pairs.append((a, b)) 28 29 pairs.sort() 30 31 print >>outfile, '// Generated from Unicode Bidi Mirroring tables\n' 32 print >>outfile, '#ifndef MIRRORING_PROPERTY_H_' 33 print >>outfile, '#define MIRRORING_PROPERTY_H_\n' 34 print >>outfile, '#include <stdint.h>' 35 print >>outfile, 'struct mirroring_property {' 36 print >>outfile, ' uint32_t a;' 37 print >>outfile, ' uint32_t b;' 38 print >>outfile, '};\n' 39 print >>outfile, 'static const struct mirroring_property mirroring_properties[] = {' 40 for pair in pairs: 41 print >>outfile, ' {0x%x, 0x%x},' % pair 42 print >>outfile, '};\n' 43 print >>outfile, 'static const unsigned mirroring_properties_count = %d;\n' % len(pairs) 44 print >>outfile, '#endif // MIRRORING_PROPERTY_H_' 45 46 if __name__ == '__main__': 47 if len(sys.argv) != 3: 48 print 'Usage: %s <input .txt> <output .h>' % sys.argv[0] 49 else: 50 main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+')) 51