Home | History | Annotate | Download | only in JavaScriptCore
      1 # Copyright (C) 2010 Apple Inc. All rights reserved.
      2 # 
      3 # Redistribution and use in source and binary forms, with or without
      4 # modification, are permitted provided that the following conditions
      5 # are met:
      6 # 1. Redistributions of source code must retain the above copyright
      7 #    notice, this list of conditions and the following disclaimer.
      8 # 2. Redistributions in binary form must reproduce the above copyright
      9 #    notice, this list of conditions and the following disclaimer in the
     10 #    documentation and/or other materials provided with the distribution.
     11 # 
     12 # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     13 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     14 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     15 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     16 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     17 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     18 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     19 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     20 # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     21 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     22 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
     23 
     24 import sys
     25 
     26 types = {
     27     "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]},
     28     "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]},
     29     "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
     30     "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]},
     31     "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0xffff)]},
     32     "digits": { "UseTable" : False, "data": [('0', '9')]},
     33     "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] }
     34 }
     35 entriesPerLine = 50
     36 arrays = "";
     37 functions = "";
     38 emitTables = (len(sys.argv) < 2 or sys.argv[1] != "--no-tables")
     39 
     40 for name, classes in types.items():
     41     ranges = [];
     42     size = 0;
     43     for _class in classes["data"]:
     44         if type(_class) == str:
     45             ranges.append((ord(_class), ord(_class)))
     46         elif type(_class) == int:
     47             ranges.append((_class, _class))
     48         else:
     49             (min, max) = _class;
     50             if type(min) == str:
     51                 min = ord(min)
     52             if type(max) == str:
     53                 max = ord(max)
     54             if max > 0x7f and min <= 0x7f:
     55                 ranges.append((min, 0x7f))
     56                 min = 0x80
     57             ranges.append((min,max))
     58     ranges.sort();
     59     
     60     if emitTables and classes["UseTable"] and (not "Inverse" in classes):
     61         array = ("static const char _%sData[65536] = {\n" % name);
     62         i = 0
     63         for (min,max) in ranges:
     64             while i < min:
     65                 i = i + 1
     66                 array += ('0,')
     67                 if (i % entriesPerLine == 0) and (i != 0):
     68                     array += ('\n')
     69             while i <= max:
     70                 i = i + 1
     71                 if (i == 65536):
     72                     array += ("1")
     73                 else:
     74                     array += ('1,')
     75                 if (i % entriesPerLine == 0) and (i != 0):
     76                     array += ('\n')
     77         while i < 0xffff:
     78             array += ("0,")
     79             i = i + 1;
     80             if (i % entriesPerLine == 0) and (i != 0):
     81                 array += ('\n')
     82         if i == 0xffff:
     83             array += ("0")
     84         array += ("\n};\n\n");
     85         arrays += array
     86     
     87     # Generate createFunction:
     88     function = "";
     89     function += ("CharacterClass* %sCreate()\n" % name)
     90     function += ("{\n")
     91     if emitTables and classes["UseTable"]:
     92         if "Inverse" in classes:
     93             function += ("    CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"]))
     94         else:
     95             function += ("    CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name))
     96     else:
     97         function += ("    CharacterClass* characterClass = new CharacterClass(0);\n")
     98     for (min, max) in ranges:
     99         if (min == max):
    100             if (min > 127):
    101                 function += ("    characterClass->m_matchesUnicode.append(0x%04x);\n" % min)
    102             else:
    103                 function += ("    characterClass->m_matches.append(0x%02x);\n" % min)
    104             continue
    105         if (min > 127) or (max > 127):
    106             function += ("    characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max))
    107         else:
    108             function += ("    characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max))
    109     function += ("    return characterClass;\n")
    110     function += ("}\n\n")
    111     functions += function
    112 
    113 if (len(sys.argv) > 1):
    114     f = open(sys.argv[-1], "w")
    115     f.write(arrays)
    116     f.write(functions)
    117     f.close()
    118 else:
    119     print(arrays)
    120     print(functions)
    121 
    122