1 import sys 2 from unicode_parse_common import * 3 4 # http://www.unicode.org/Public/5.1.0/ucd/extracted/DerivedGeneralCategory.txt 5 6 category_to_harfbuzz = { 7 'Mn': 'HB_Mark_NonSpacing', 8 'Mc': 'HB_Mark_SpacingCombining', 9 'Me': 'HB_Mark_Enclosing', 10 11 'Nd': 'HB_Number_DecimalDigit', 12 'Nl': 'HB_Number_Letter', 13 'No': 'HB_Number_Other', 14 15 'Zs': 'HB_Separator_Space', 16 'Zl': 'HB_Separator_Line', 17 'Zp': 'HB_Separator_Paragraph', 18 19 'Cc': 'HB_Other_Control', 20 'Cf': 'HB_Other_Format', 21 'Cs': 'HB_Other_Surrogate', 22 'Co': 'HB_Other_PrivateUse', 23 'Cn': 'HB_Other_NotAssigned', 24 25 'Lu': 'HB_Letter_Uppercase', 26 'Ll': 'HB_Letter_Lowercase', 27 'Lt': 'HB_Letter_Titlecase', 28 'Lm': 'HB_Letter_Modifier', 29 'Lo': 'HB_Letter_Other', 30 31 'Pc': 'HB_Punctuation_Connector', 32 'Pd': 'HB_Punctuation_Dash', 33 'Ps': 'HB_Punctuation_Open', 34 'Pe': 'HB_Punctuation_Close', 35 'Pi': 'HB_Punctuation_InitialQuote', 36 'Pf': 'HB_Punctuation_FinalQuote', 37 'Po': 'HB_Punctuation_Other', 38 39 'Sm': 'HB_Symbol_Math', 40 'Sc': 'HB_Symbol_Currency', 41 'Sk': 'HB_Symbol_Modifier', 42 'So': 'HB_Symbol_Other', 43 } 44 45 def main(infile, outfile): 46 ranges = unicode_file_parse(infile, category_to_harfbuzz) 47 ranges = sort_and_merge(ranges) 48 49 print >>outfile, '// Generated from Unicode script tables\n' 50 print >>outfile, '#ifndef CATEGORY_PROPERTIES_H_' 51 print >>outfile, '#define CATEGORY_PROPERTIES_H_\n' 52 print >>outfile, '#include <stdint.h>' 53 print >>outfile, '#include "harfbuzz-external.h"\n' 54 print >>outfile, 'struct category_property {' 55 print >>outfile, ' uint32_t range_start;' 56 print >>outfile, ' uint32_t range_end;' 57 print >>outfile, ' HB_CharCategory category;' 58 print >>outfile, '};\n' 59 print >>outfile, 'static const struct category_property category_properties[] = {' 60 for (start, end, value) in ranges: 61 print >>outfile, ' {0x%x, 0x%x, %s},' % (start, end, value) 62 print >>outfile, '};\n' 63 print >>outfile, 'static const unsigned category_properties_count = %d;\n' % len(ranges) 64 print >>outfile, '#endif // CATEGORY_PROPERTIES_H_' 65 66 if __name__ == '__main__': 67 if len(sys.argv) != 3: 68 print 'Usage: %s <input .txt> <output .h>' % sys.argv[0] 69 else: 70 main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+')) 71