1 # Copyright (C) 2010 Apple Inc. All rights reserved. 2 # 3 # Redistribution and use in source and binary forms, with or without 4 # modification, are permitted provided that the following conditions 5 # are met: 6 # 1. Redistributions of source code must retain the above copyright 7 # notice, this list of conditions and the following disclaimer. 8 # 2. Redistributions in binary form must reproduce the above copyright 9 # notice, this list of conditions and the following disclaimer in the 10 # documentation and/or other materials provided with the distribution. 11 # 12 # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 13 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 15 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 16 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 17 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 18 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 19 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 20 # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 22 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 24 import sys 25 26 types = { 27 "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]}, 28 "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]}, 29 "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]}, 30 "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]}, 31 "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0xffff)]}, 32 "digits": { "UseTable" : False, "data": [('0', '9')]}, 33 "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] } 34 } 35 entriesPerLine = 50 36 arrays = ""; 37 functions = ""; 38 emitTables = (len(sys.argv) < 2 or sys.argv[1] != "--no-tables") 39 40 for name, classes in types.items(): 41 ranges = []; 42 size = 0; 43 for _class in classes["data"]: 44 if type(_class) == str: 45 ranges.append((ord(_class), ord(_class))) 46 elif type(_class) == int: 47 ranges.append((_class, _class)) 48 else: 49 (min, max) = _class; 50 if type(min) == str: 51 min = ord(min) 52 if type(max) == str: 53 max = ord(max) 54 if max > 0x7f and min <= 0x7f: 55 ranges.append((min, 0x7f)) 56 min = 0x80 57 ranges.append((min,max)) 58 ranges.sort(); 59 60 if emitTables and classes["UseTable"] and (not "Inverse" in classes): 61 array = ("static const char _%sData[65536] = {\n" % name); 62 i = 0 63 for (min,max) in ranges: 64 while i < min: 65 i = i + 1 66 array += ('0,') 67 if (i % entriesPerLine == 0) and (i != 0): 68 array += ('\n') 69 while i <= max: 70 i = i + 1 71 if (i == 65536): 72 array += ("1") 73 else: 74 array += ('1,') 75 if (i % entriesPerLine == 0) and (i != 0): 76 array += ('\n') 77 while i < 0xffff: 78 array += ("0,") 79 i = i + 1; 80 if (i % entriesPerLine == 0) and (i != 0): 81 array += ('\n') 82 if i == 0xffff: 83 array += ("0") 84 array += ("\n};\n\n"); 85 arrays += array 86 87 # Generate createFunction: 88 function = ""; 89 function += ("CharacterClass* %sCreate()\n" % name) 90 function += ("{\n") 91 if emitTables and classes["UseTable"]: 92 if "Inverse" in classes: 93 function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"])) 94 else: 95 function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name)) 96 else: 97 function += (" CharacterClass* characterClass = new CharacterClass(0);\n") 98 for (min, max) in ranges: 99 if (min == max): 100 if (min > 127): 101 function += (" characterClass->m_matchesUnicode.append(0x%04x);\n" % min) 102 else: 103 function += (" characterClass->m_matches.append(0x%02x);\n" % min) 104 continue 105 if (min > 127) or (max > 127): 106 function += (" characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max)) 107 else: 108 function += (" characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max)) 109 function += (" return characterClass;\n") 110 function += ("}\n\n") 111 functions += function 112 113 if (len(sys.argv) > 1): 114 f = open(sys.argv[-1], "w") 115 f.write(arrays) 116 f.write(functions) 117 f.close() 118 else: 119 print(arrays) 120 print(functions) 121 122