Home | History | Annotate | Download | only in mterp
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (C) 2007 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 #
     18 # Using instructions from an architecture-specific config file, generate C
     19 # and assembly source files for the Dalvik interpreter.
     20 #
     21 
     22 import sys, string, re, time
     23 from string import Template
     24 
     25 interp_defs_file = "../../libdex/DexOpcodes.h" # need opcode list
     26 kNumPackedOpcodes = 256 # TODO: Derive this from DexOpcodes.h.
     27 
     28 splitops = False
     29 verbose = False
     30 handler_size_bits = -1000
     31 handler_size_bytes = -1000
     32 in_op_start = 0             # 0=not started, 1=started, 2=ended
     33 in_alt_op_start = 0         # 0=not started, 1=started, 2=ended
     34 default_op_dir = None
     35 default_alt_stub = None
     36 opcode_locations = {}
     37 alt_opcode_locations = {}
     38 asm_stub_text = []
     39 label_prefix = ".L"         # use ".L" to hide labels from gdb
     40 alt_label_prefix = ".L_ALT" # use ".L" to hide labels from gdb
     41 style = None                # interpreter style
     42 generate_alt_table = False
     43 
     44 # Exception class.
     45 class DataParseError(SyntaxError):
     46     "Failure when parsing data file"
     47 
     48 #
     49 # Set any omnipresent substitution values.
     50 #
     51 def getGlobalSubDict():
     52     return { "handler_size_bits":handler_size_bits,
     53              "handler_size_bytes":handler_size_bytes }
     54 
     55 #
     56 # Parse arch config file --
     57 # Set interpreter style.
     58 #
     59 def setHandlerStyle(tokens):
     60     global style
     61     if len(tokens) != 2:
     62         raise DataParseError("handler-style requires one argument")
     63     style = tokens[1]
     64     if style != "computed-goto" and style != "jump-table" and style != "all-c":
     65         raise DataParseError("handler-style (%s) invalid" % style)
     66 
     67 #
     68 # Parse arch config file --
     69 # Set handler_size_bytes to the value of tokens[1], and handler_size_bits to
     70 # log2(handler_size_bytes).  Throws an exception if "bytes" is not 0 or
     71 # a power of two.
     72 #
     73 def setHandlerSize(tokens):
     74     global handler_size_bits, handler_size_bytes
     75     if style != "computed-goto":
     76         print "Warning: handler-size valid only for computed-goto interpreters"
     77     if len(tokens) != 2:
     78         raise DataParseError("handler-size requires one argument")
     79     if handler_size_bits != -1000:
     80         raise DataParseError("handler-size may only be set once")
     81 
     82     # compute log2(n), and make sure n is 0 or a power of 2
     83     handler_size_bytes = bytes = int(tokens[1])
     84     bits = -1
     85     while bytes > 0:
     86         bytes //= 2     # halve with truncating division
     87         bits += 1
     88 
     89     if handler_size_bytes == 0 or handler_size_bytes != (1 << bits):
     90         raise DataParseError("handler-size (%d) must be power of 2" \
     91                 % orig_bytes)
     92     handler_size_bits = bits
     93 
     94 #
     95 # Parse arch config file --
     96 # Copy a file in to the C or asm output file.
     97 #
     98 def importFile(tokens):
     99     if len(tokens) != 2:
    100         raise DataParseError("import requires one argument")
    101     source = tokens[1]
    102     if source.endswith(".cpp"):
    103         appendSourceFile(tokens[1], getGlobalSubDict(), c_fp, None)
    104     elif source.endswith(".S"):
    105         appendSourceFile(tokens[1], getGlobalSubDict(), asm_fp, None)
    106     else:
    107         raise DataParseError("don't know how to import %s (expecting .cpp/.S)"
    108                 % source)
    109 
    110 #
    111 # Parse arch config file --
    112 # Copy a file in to the C or asm output file.
    113 #
    114 def setAsmStub(tokens):
    115     global asm_stub_text
    116     if style == "all-c":
    117         print "Warning: asm-stub ignored for all-c interpreter"
    118     if len(tokens) != 2:
    119         raise DataParseError("import requires one argument")
    120     try:
    121         stub_fp = open(tokens[1])
    122         asm_stub_text = stub_fp.readlines()
    123     except IOError, err:
    124         stub_fp.close()
    125         raise DataParseError("unable to load asm-stub: %s" % str(err))
    126     stub_fp.close()
    127 
    128 #
    129 # Parse arch config file --
    130 # Record location of default alt stub
    131 #
    132 def setAsmAltStub(tokens):
    133     global default_alt_stub, generate_alt_table
    134     if style == "all-c":
    135         print "Warning: asm-alt-stub ingored for all-c interpreter"
    136     if len(tokens) != 2:
    137         raise DataParseError("import requires one argument")
    138     default_alt_stub = tokens[1]
    139     generate_alt_table = True
    140 
    141 #
    142 # Parse arch config file --
    143 # Start of opcode list.
    144 #
    145 def opStart(tokens):
    146     global in_op_start
    147     global default_op_dir
    148     if len(tokens) != 2:
    149         raise DataParseError("opStart takes a directory name argument")
    150     if in_op_start != 0:
    151         raise DataParseError("opStart can only be specified once")
    152     default_op_dir = tokens[1]
    153     in_op_start = 1
    154 
    155 #
    156 # Parse arch config file --
    157 # Set location of a single alt opcode's source file.
    158 #
    159 def altEntry(tokens):
    160     global generate_alt_table
    161     if len(tokens) != 3:
    162         raise DataParseError("alt requires exactly two arguments")
    163     if in_op_start != 1:
    164         raise DataParseError("alt statements must be between opStart/opEnd")
    165     try:
    166         index = opcodes.index(tokens[1])
    167     except ValueError:
    168         raise DataParseError("unknown opcode %s" % tokens[1])
    169     if alt_opcode_locations.has_key(tokens[1]):
    170         print "Note: alt overrides earlier %s (%s -> %s)" \
    171                 % (tokens[1], alt_opcode_locations[tokens[1]], tokens[2])
    172     alt_opcode_locations[tokens[1]] = tokens[2]
    173     generate_alt_table = True
    174 
    175 #
    176 # Parse arch config file --
    177 # Set location of a single opcode's source file.
    178 #
    179 def opEntry(tokens):
    180     #global opcode_locations
    181     if len(tokens) != 3:
    182         raise DataParseError("op requires exactly two arguments")
    183     if in_op_start != 1:
    184         raise DataParseError("op statements must be between opStart/opEnd")
    185     try:
    186         index = opcodes.index(tokens[1])
    187     except ValueError:
    188         raise DataParseError("unknown opcode %s" % tokens[1])
    189     if opcode_locations.has_key(tokens[1]):
    190         print "Note: op overrides earlier %s (%s -> %s)" \
    191                 % (tokens[1], opcode_locations[tokens[1]], tokens[2])
    192     opcode_locations[tokens[1]] = tokens[2]
    193 
    194 #
    195 # Emit jump table
    196 #
    197 def emitJmpTable(start_label, prefix):
    198     asm_fp.write("\n    .global %s\n" % start_label)
    199     asm_fp.write("    .text\n")
    200     asm_fp.write("%s:\n" % start_label)
    201     for i in xrange(kNumPackedOpcodes):
    202         op = opcodes[i]
    203         dict = getGlobalSubDict()
    204         dict.update({ "opcode":op, "opnum":i })
    205         asm_fp.write("    .long " + prefix + \
    206                      "_%(opcode)s /* 0x%(opnum)02x */\n" % dict)
    207 
    208 #
    209 # Parse arch config file --
    210 # End of opcode list; emit instruction blocks.
    211 #
    212 def opEnd(tokens):
    213     global in_op_start
    214     if len(tokens) != 1:
    215         raise DataParseError("opEnd takes no arguments")
    216     if in_op_start != 1:
    217         raise DataParseError("opEnd must follow opStart, and only appear once")
    218     in_op_start = 2
    219 
    220     loadAndEmitOpcodes()
    221     if splitops == False:
    222         if generate_alt_table:
    223             loadAndEmitAltOpcodes()
    224             if style == "jump-table":
    225                 emitJmpTable("dvmAsmInstructionStart", label_prefix);
    226                 emitJmpTable("dvmAsmAltInstructionStart", alt_label_prefix);
    227 
    228 def genaltop(tokens):
    229     if in_op_start != 2:
    230        raise DataParseError("alt-op can be specified only after op-end")
    231     if len(tokens) != 1:
    232         raise DataParseError("opEnd takes no arguments")
    233     if generate_alt_table:
    234         loadAndEmitAltOpcodes()
    235         if style == "jump-table":
    236             emitJmpTable("dvmAsmInstructionStart", label_prefix);
    237             emitJmpTable("dvmAsmAltInstructionStart", alt_label_prefix);
    238 
    239 
    240 #
    241 # Extract an ordered list of instructions from the VM sources.  We use the
    242 # "goto table" definition macro, which has exactly kNumPackedOpcodes
    243 # entries.
    244 #
    245 def getOpcodeList():
    246     opcodes = []
    247     opcode_fp = open(interp_defs_file)
    248     opcode_re = re.compile(r"^\s*H\(OP_(\w+)\),.*", re.DOTALL)
    249     for line in opcode_fp:
    250         match = opcode_re.match(line)
    251         if not match:
    252             continue
    253         opcodes.append("OP_" + match.group(1))
    254     opcode_fp.close()
    255 
    256     if len(opcodes) != kNumPackedOpcodes:
    257         print "ERROR: found %d opcodes in Interp.h (expected %d)" \
    258                 % (len(opcodes), kNumPackedOpcodes)
    259         raise SyntaxError, "bad opcode count"
    260     return opcodes
    261 
    262 def emitAlign():
    263     if style == "computed-goto":
    264         asm_fp.write("    .balign %d\n" % handler_size_bytes)
    265 
    266 #
    267 # Load and emit opcodes for all kNumPackedOpcodes instructions.
    268 #
    269 def loadAndEmitOpcodes():
    270     sister_list = []
    271     assert len(opcodes) == kNumPackedOpcodes
    272     need_dummy_start = False
    273     if style == "jump-table":
    274         start_label = "dvmAsmInstructionStartCode"
    275         end_label = "dvmAsmInstructionEndCode"
    276     else:
    277         start_label = "dvmAsmInstructionStart"
    278         end_label = "dvmAsmInstructionEnd"
    279 
    280     # point dvmAsmInstructionStart at the first handler or stub
    281     asm_fp.write("\n    .global %s\n" % start_label)
    282     asm_fp.write("    .type   %s, %%function\n" % start_label)
    283     asm_fp.write("%s = " % start_label + label_prefix + "_OP_NOP\n")
    284     asm_fp.write("    .text\n\n")
    285 
    286     for i in xrange(kNumPackedOpcodes):
    287         op = opcodes[i]
    288 
    289         if opcode_locations.has_key(op):
    290             location = opcode_locations[op]
    291         else:
    292             location = default_op_dir
    293 
    294         if location == "c":
    295             loadAndEmitC(location, i)
    296             if len(asm_stub_text) == 0:
    297                 need_dummy_start = True
    298         else:
    299             loadAndEmitAsm(location, i, sister_list)
    300 
    301     # For a 100% C implementation, there are no asm handlers or stubs.  We
    302     # need to have the dvmAsmInstructionStart label point at OP_NOP, and it's
    303     # too annoying to try to slide it in after the alignment psuedo-op, so
    304     # we take the low road and just emit a dummy OP_NOP here.
    305     if need_dummy_start:
    306         emitAlign()
    307         asm_fp.write(label_prefix + "_OP_NOP:   /* dummy */\n");
    308 
    309     emitAlign()
    310     asm_fp.write("    .size   %s, .-%s\n" % (start_label, start_label))
    311     asm_fp.write("    .global %s\n" % end_label)
    312     asm_fp.write("%s:\n" % end_label)
    313 
    314     if style == "computed-goto":
    315         emitSectionComment("Sister implementations", asm_fp)
    316         asm_fp.write("    .global dvmAsmSisterStart\n")
    317         asm_fp.write("    .type   dvmAsmSisterStart, %function\n")
    318         asm_fp.write("    .text\n")
    319         asm_fp.write("    .balign 4\n")
    320         asm_fp.write("dvmAsmSisterStart:\n")
    321         asm_fp.writelines(sister_list)
    322         asm_fp.write("\n    .size   dvmAsmSisterStart, .-dvmAsmSisterStart\n")
    323         asm_fp.write("    .global dvmAsmSisterEnd\n")
    324         asm_fp.write("dvmAsmSisterEnd:\n\n")
    325 
    326 #
    327 # Load an alternate entry stub
    328 #
    329 def loadAndEmitAltStub(source, opindex):
    330     op = opcodes[opindex]
    331     if verbose:
    332         print " alt emit %s --> stub" % source
    333     dict = getGlobalSubDict()
    334     dict.update({ "opcode":op, "opnum":opindex })
    335 
    336     emitAsmHeader(asm_fp, dict, alt_label_prefix)
    337     appendSourceFile(source, dict, asm_fp, None)
    338 
    339 #
    340 # Load and emit alternate opcodes for all kNumPackedOpcodes instructions.
    341 #
    342 def loadAndEmitAltOpcodes():
    343     assert len(opcodes) == kNumPackedOpcodes
    344     if style == "jump-table":
    345         start_label = "dvmAsmAltInstructionStartCode"
    346         end_label = "dvmAsmAltInstructionEndCode"
    347     else:
    348         start_label = "dvmAsmAltInstructionStart"
    349         end_label = "dvmAsmAltInstructionEnd"
    350 
    351     # point dvmAsmInstructionStart at the first handler or stub
    352     asm_fp.write("\n    .global %s\n" % start_label)
    353     asm_fp.write("    .type   %s, %%function\n" % start_label)
    354     asm_fp.write("    .text\n\n")
    355     asm_fp.write("%s = " % start_label + label_prefix + "_ALT_OP_NOP\n")
    356 
    357     for i in xrange(kNumPackedOpcodes):
    358         op = opcodes[i]
    359         if alt_opcode_locations.has_key(op):
    360             source = "%s/ALT_%s.S" % (alt_opcode_locations[op], op)
    361         else:
    362             source = default_alt_stub
    363         loadAndEmitAltStub(source, i)
    364 
    365     emitAlign()
    366     asm_fp.write("    .size   %s, .-%s\n" % (start_label, start_label))
    367     asm_fp.write("    .global %s\n" % end_label)
    368     asm_fp.write("%s:\n" % end_label)
    369 
    370 #
    371 # Load a C fragment and emit it, then output an assembly stub.
    372 #
    373 def loadAndEmitC(location, opindex):
    374     op = opcodes[opindex]
    375     source = "%s/%s.cpp" % (location, op)
    376     if verbose:
    377         print " emit %s --> C++" % source
    378     dict = getGlobalSubDict()
    379     dict.update({ "opcode":op, "opnum":opindex })
    380 
    381     appendSourceFile(source, dict, c_fp, None)
    382 
    383     if len(asm_stub_text) != 0:
    384         emitAsmStub(asm_fp, dict)
    385 
    386 #
    387 # Load an assembly fragment and emit it.
    388 #
    389 def loadAndEmitAsm(location, opindex, sister_list):
    390     op = opcodes[opindex]
    391     source = "%s/%s.S" % (location, op)
    392     dict = getGlobalSubDict()
    393     dict.update({ "opcode":op, "opnum":opindex })
    394     if verbose:
    395         print " emit %s --> asm" % source
    396 
    397     emitAsmHeader(asm_fp, dict, label_prefix)
    398     appendSourceFile(source, dict, asm_fp, sister_list)
    399 
    400 #
    401 # Output the alignment directive and label for an assembly piece.
    402 #
    403 def emitAsmHeader(outfp, dict, prefix):
    404     outfp.write("/* ------------------------------ */\n")
    405     # The alignment directive ensures that the handler occupies
    406     # at least the correct amount of space.  We don't try to deal
    407     # with overflow here.
    408     emitAlign()
    409     # Emit a label so that gdb will say the right thing.  We prepend an
    410     # underscore so the symbol name doesn't clash with the Opcode enum.
    411     outfp.write(prefix + "_%(opcode)s: /* 0x%(opnum)02x */\n" % dict)
    412 
    413 #
    414 # Output a generic instruction stub that updates the "glue" struct and
    415 # calls the C implementation.
    416 #
    417 def emitAsmStub(outfp, dict):
    418     emitAsmHeader(outfp, dict, label_prefix)
    419     for line in asm_stub_text:
    420         templ = Template(line)
    421         outfp.write(templ.substitute(dict))
    422 
    423 #
    424 # Append the file specified by "source" to the open "outfp".  Each line will
    425 # be template-replaced using the substitution dictionary "dict".
    426 #
    427 # If the first line of the file starts with "%" it is taken as a directive.
    428 # A "%include" line contains a filename and, optionally, a Python-style
    429 # dictionary declaration with substitution strings.  (This is implemented
    430 # with recursion.)
    431 #
    432 # If "sister_list" is provided, and we find a line that contains only "&",
    433 # all subsequent lines from the file will be appended to sister_list instead
    434 # of copied to the output.
    435 #
    436 # This may modify "dict".
    437 #
    438 def appendSourceFile(source, dict, outfp, sister_list):
    439     outfp.write("/* File: %s */\n" % source)
    440     infp = open(source, "r")
    441     in_sister = False
    442     for line in infp:
    443         if line.startswith("%include"):
    444             # Parse the "include" line
    445             tokens = line.strip().split(' ', 2)
    446             if len(tokens) < 2:
    447                 raise DataParseError("malformed %%include in %s" % source)
    448 
    449             alt_source = tokens[1].strip("\"")
    450             if alt_source == source:
    451                 raise DataParseError("self-referential %%include in %s"
    452                         % source)
    453 
    454             new_dict = dict.copy()
    455             if len(tokens) == 3:
    456                 new_dict.update(eval(tokens[2]))
    457             #print " including src=%s dict=%s" % (alt_source, new_dict)
    458             appendSourceFile(alt_source, new_dict, outfp, sister_list)
    459             continue
    460 
    461         elif line.startswith("%default"):
    462             # copy keywords into dictionary
    463             tokens = line.strip().split(' ', 1)
    464             if len(tokens) < 2:
    465                 raise DataParseError("malformed %%default in %s" % source)
    466             defaultValues = eval(tokens[1])
    467             for entry in defaultValues:
    468                 dict.setdefault(entry, defaultValues[entry])
    469             continue
    470 
    471         elif line.startswith("%verify"):
    472             # more to come, someday
    473             continue
    474 
    475         elif line.startswith("%break") and sister_list != None:
    476             # allow more than one %break, ignoring all following the first
    477             if style == "computed-goto" and not in_sister:
    478                 in_sister = True
    479                 sister_list.append("\n/* continuation for %(opcode)s */\n"%dict)
    480             continue
    481 
    482         # perform keyword substitution if a dictionary was provided
    483         if dict != None:
    484             templ = Template(line)
    485             try:
    486                 subline = templ.substitute(dict)
    487             except KeyError, err:
    488                 raise DataParseError("keyword substitution failed in %s: %s"
    489                         % (source, str(err)))
    490             except:
    491                 print "ERROR: substitution failed: " + line
    492                 raise
    493         else:
    494             subline = line
    495 
    496         # write output to appropriate file
    497         if in_sister:
    498             sister_list.append(subline)
    499         else:
    500             outfp.write(subline)
    501     outfp.write("\n")
    502     infp.close()
    503 
    504 #
    505 # Emit a C-style section header comment.
    506 #
    507 def emitSectionComment(str, fp):
    508     equals = "========================================" \
    509              "==================================="
    510 
    511     fp.write("\n/*\n * %s\n *  %s\n * %s\n */\n" %
    512         (equals, str, equals))
    513 
    514 
    515 #
    516 # ===========================================================================
    517 # "main" code
    518 #
    519 
    520 #
    521 # Check args.
    522 #
    523 if len(sys.argv) != 3:
    524     print "Usage: %s target-arch output-dir" % sys.argv[0]
    525     sys.exit(2)
    526 
    527 target_arch = sys.argv[1]
    528 output_dir = sys.argv[2]
    529 
    530 #
    531 # Extract opcode list.
    532 #
    533 opcodes = getOpcodeList()
    534 #for op in opcodes:
    535 #    print "  %s" % op
    536 
    537 #
    538 # Open config file.
    539 #
    540 try:
    541     config_fp = open("config-%s" % target_arch)
    542 except:
    543     print "Unable to open config file 'config-%s'" % target_arch
    544     sys.exit(1)
    545 
    546 #
    547 # Open and prepare output files.
    548 #
    549 try:
    550     c_fp = open("%s/InterpC-%s.cpp" % (output_dir, target_arch), "w")
    551     asm_fp = open("%s/InterpAsm-%s.S" % (output_dir, target_arch), "w")
    552 except:
    553     print "Unable to open output files"
    554     print "Make sure directory '%s' exists and existing files are writable" \
    555             % output_dir
    556     # Ideally we'd remove the files to avoid confusing "make", but if they
    557     # failed to open we probably won't be able to remove them either.
    558     sys.exit(1)
    559 
    560 print "Generating %s, %s" % (c_fp.name, asm_fp.name)
    561 
    562 file_header = """/*
    563  * This file was generated automatically by gen-mterp.py for '%s'.
    564  *
    565  * --> DO NOT EDIT <--
    566  */
    567 
    568 """ % (target_arch)
    569 
    570 c_fp.write(file_header)
    571 asm_fp.write(file_header)
    572 
    573 #
    574 # Process the config file.
    575 #
    576 failed = False
    577 try:
    578     for line in config_fp:
    579         line = line.strip()         # remove CRLF, leading spaces
    580         tokens = line.split(' ')    # tokenize
    581         #print "%d: %s" % (len(tokens), tokens)
    582         if len(tokens[0]) == 0:
    583             #print "  blank"
    584             pass
    585         elif tokens[0][0] == '#':
    586             #print "  comment"
    587             pass
    588         else:
    589             if tokens[0] == "handler-size":
    590                 setHandlerSize(tokens)
    591             elif tokens[0] == "import":
    592                 importFile(tokens)
    593             elif tokens[0] == "asm-stub":
    594                 setAsmStub(tokens)
    595             elif tokens[0] == "asm-alt-stub":
    596                 setAsmAltStub(tokens)
    597             elif tokens[0] == "op-start":
    598                 opStart(tokens)
    599             elif tokens[0] == "op-end":
    600                 opEnd(tokens)
    601             elif tokens[0] == "alt":
    602                 altEntry(tokens)
    603             elif tokens[0] == "op":
    604                 opEntry(tokens)
    605             elif tokens[0] == "handler-style":
    606                 setHandlerStyle(tokens)
    607             elif tokens[0] == "alt-ops":
    608                 genaltop(tokens)
    609             elif tokens[0] == "split-ops":
    610                 splitops = True
    611             else:
    612                 raise DataParseError, "unrecognized command '%s'" % tokens[0]
    613             if style == None:
    614                 print "tokens[0] = %s" % tokens[0]
    615                 raise DataParseError, "handler-style must be first command"
    616 except DataParseError, err:
    617     print "Failed: " + str(err)
    618     # TODO: remove output files so "make" doesn't get confused
    619     failed = True
    620     c_fp.close()
    621     asm_fp.close()
    622     c_fp = asm_fp = None
    623 
    624 config_fp.close()
    625 
    626 #
    627 # Done!
    628 #
    629 if c_fp:
    630     c_fp.close()
    631 if asm_fp:
    632     asm_fp.close()
    633 
    634 sys.exit(failed)
    635