Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python3
      2 # This script converts a C file to use the PEP 384 type definition API
      3 # Usage: abitype.py < old_code > new_code
      4 import re, sys
      5 
      6 ###### Replacement of PyTypeObject static instances ##############
      7 
      8 # classify each token, giving it a one-letter code:
      9 # S: static
     10 # T: PyTypeObject
     11 # I: ident
     12 # W: whitespace
     13 # =, {, }, ; : themselves
     14 def classify():
     15     res = []
     16     for t,v in tokens:
     17         if t == 'other' and v in "={};":
     18             res.append(v)
     19         elif t == 'ident':
     20             if v == 'PyTypeObject':
     21                 res.append('T')
     22             elif v == 'static':
     23                 res.append('S')
     24             else:
     25                 res.append('I')
     26         elif t == 'ws':
     27             res.append('W')
     28         else:
     29             res.append('.')
     30     return ''.join(res)
     31 
     32 # Obtain a list of fields of a PyTypeObject, in declaration order,
     33 # skipping ob_base
     34 # All comments are dropped from the variable (which are typically
     35 # just the slot names, anyway), and information is discarded whether
     36 # the original type was static.
     37 def get_fields(start, real_end):
     38     pos = start
     39     # static?
     40     if tokens[pos][1] == 'static':
     41         pos += 2
     42     # PyTypeObject
     43     pos += 2
     44     # name
     45     name = tokens[pos][1]
     46     pos += 1
     47     while tokens[pos][1] != '{':
     48         pos += 1
     49     pos += 1
     50     # PyVarObject_HEAD_INIT
     51     while tokens[pos][0] in ('ws', 'comment'):
     52         pos += 1
     53     if tokens[pos][1] != 'PyVarObject_HEAD_INIT':
     54         raise Exception('%s has no PyVarObject_HEAD_INIT' % name)
     55     while tokens[pos][1] != ')':
     56         pos += 1
     57     pos += 1
     58     # field definitions: various tokens, comma-separated
     59     fields = []
     60     while True:
     61         while tokens[pos][0] in ('ws', 'comment'):
     62             pos += 1
     63         end = pos
     64         while tokens[end][1] not in ',}':
     65             if tokens[end][1] == '(':
     66                 nesting = 1
     67                 while nesting:
     68                     end += 1
     69                     if tokens[end][1] == '(': nesting+=1
     70                     if tokens[end][1] == ')': nesting-=1
     71             end += 1
     72         assert end < real_end
     73         # join field, excluding separator and trailing ws
     74         end1 = end-1
     75         while tokens[end1][0] in ('ws', 'comment'):
     76             end1 -= 1
     77         fields.append(''.join(t[1] for t in tokens[pos:end1+1]))
     78         if tokens[end][1] == '}':
     79             break
     80         pos = end+1
     81     return name, fields
     82 
     83 # List of type slots as of Python 3.2, omitting ob_base
     84 typeslots = [
     85     'tp_name',
     86     'tp_basicsize',
     87     'tp_itemsize',
     88     'tp_dealloc',
     89     'tp_print',
     90     'tp_getattr',
     91     'tp_setattr',
     92     'tp_reserved',
     93     'tp_repr',
     94     'tp_as_number',
     95     'tp_as_sequence',
     96     'tp_as_mapping',
     97     'tp_hash',
     98     'tp_call',
     99     'tp_str',
    100     'tp_getattro',
    101     'tp_setattro',
    102     'tp_as_buffer',
    103     'tp_flags',
    104     'tp_doc',
    105     'tp_traverse',
    106     'tp_clear',
    107     'tp_richcompare',
    108     'tp_weaklistoffset',
    109     'tp_iter',
    110     'iternextfunc',
    111     'tp_methods',
    112     'tp_members',
    113     'tp_getset',
    114     'tp_base',
    115     'tp_dict',
    116     'tp_descr_get',
    117     'tp_descr_set',
    118     'tp_dictoffset',
    119     'tp_init',
    120     'tp_alloc',
    121     'tp_new',
    122     'tp_free',
    123     'tp_is_gc',
    124     'tp_bases',
    125     'tp_mro',
    126     'tp_cache',
    127     'tp_subclasses',
    128     'tp_weaklist',
    129     'tp_del',
    130     'tp_version_tag',
    131 ]
    132 
    133 # Generate a PyType_Spec definition
    134 def make_slots(name, fields):
    135     res = []
    136     res.append('static PyType_Slot %s_slots[] = {' % name)
    137     # defaults for spec
    138     spec = { 'tp_itemsize':'0' }
    139     for i, val in enumerate(fields):
    140         if val.endswith('0'):
    141             continue
    142         if typeslots[i] in ('tp_name', 'tp_doc', 'tp_basicsize',
    143                          'tp_itemsize', 'tp_flags'):
    144             spec[typeslots[i]] = val
    145             continue
    146         res.append('    {Py_%s, %s},' % (typeslots[i], val))
    147     res.append('};')
    148     res.append('static PyType_Spec %s_spec = {' % name)
    149     res.append('    %s,' % spec['tp_name'])
    150     res.append('    %s,' % spec['tp_basicsize'])
    151     res.append('    %s,' % spec['tp_itemsize'])
    152     res.append('    %s,' % spec['tp_flags'])
    153     res.append('    %s_slots,' % name)
    154     res.append('};\n')
    155     return '\n'.join(res)
    156 
    157 
    158 if __name__ == '__main__':
    159 
    160     ############ Simplistic C scanner ##################################
    161     tokenizer = re.compile(
    162         r"(?P<preproc>#.*\n)"
    163         r"|(?P<comment>/\*.*?\*/)"
    164         r"|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)"
    165         r"|(?P<ws>[ \t\n]+)"
    166         r"|(?P<other>.)",
    167         re.MULTILINE)
    168 
    169     tokens = []
    170     source = sys.stdin.read()
    171     pos = 0
    172     while pos != len(source):
    173         m = tokenizer.match(source, pos)
    174         tokens.append([m.lastgroup, m.group()])
    175         pos += len(tokens[-1][1])
    176         if tokens[-1][0] == 'preproc':
    177             # continuation lines are considered
    178             # only in preprocess statements
    179             while tokens[-1][1].endswith('\\\n'):
    180                 nl = source.find('\n', pos)
    181                 if nl == -1:
    182                     line = source[pos:]
    183                 else:
    184                     line = source[pos:nl+1]
    185                 tokens[-1][1] += line
    186                 pos += len(line)
    187 
    188     # Main loop: replace all static PyTypeObjects until
    189     # there are none left.
    190     while 1:
    191         c = classify()
    192         m = re.search('(SW)?TWIW?=W?{.*?};', c)
    193         if not m:
    194             break
    195         start = m.start()
    196         end = m.end()
    197         name, fields = get_fields(start, end)
    198         tokens[start:end] = [('',make_slots(name, fields))]
    199 
    200     # Output result to stdout
    201     for t, v in tokens:
    202         sys.stdout.write(v)
    203