Home | History | Annotate | Download | only in trace
      1 #!/usr/bin/env python
      2 ##########################################################################
      3 # 
      4 # Copyright 2008 VMware, Inc.
      5 # All Rights Reserved.
      6 # 
      7 # Permission is hereby granted, free of charge, to any person obtaining a
      8 # copy of this software and associated documentation files (the
      9 # "Software"), to deal in the Software without restriction, including
     10 # without limitation the rights to use, copy, modify, merge, publish,
     11 # distribute, sub license, and/or sell copies of the Software, and to
     12 # permit persons to whom the Software is furnished to do so, subject to
     13 # the following conditions:
     14 # 
     15 # The above copyright notice and this permission notice (including the
     16 # next paragraph) shall be included in all copies or substantial portions
     17 # of the Software.
     18 # 
     19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     20 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     22 # IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     23 # ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     24 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     25 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26 # 
     27 ##########################################################################
     28 
     29 
     30 import sys
     31 import xml.parsers.expat
     32 import optparse
     33 
     34 from model import *
     35 
     36 
     37 ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF = range(4)
     38 
     39 
     40 class XmlToken:
     41 
     42     def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
     43         assert type in (ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF)
     44         self.type = type
     45         self.name_or_data = name_or_data
     46         self.attrs = attrs
     47         self.line = line
     48         self.column = column
     49 
     50     def __str__(self):
     51         if self.type == ELEMENT_START:
     52             return '<' + self.name_or_data + ' ...>'
     53         if self.type == ELEMENT_END:
     54             return '</' + self.name_or_data + '>'
     55         if self.type == CHARACTER_DATA:
     56             return self.name_or_data
     57         if self.type == EOF:
     58             return 'end of file'
     59         assert 0
     60 
     61 
     62 class XmlTokenizer:
     63     """Expat based XML tokenizer."""
     64 
     65     def __init__(self, fp, skip_ws = True):
     66         self.fp = fp
     67         self.tokens = []
     68         self.index = 0
     69         self.final = False
     70         self.skip_ws = skip_ws
     71         
     72         self.character_pos = 0, 0
     73         self.character_data = ''
     74         
     75         self.parser = xml.parsers.expat.ParserCreate()
     76         self.parser.StartElementHandler  = self.handle_element_start
     77         self.parser.EndElementHandler    = self.handle_element_end
     78         self.parser.CharacterDataHandler = self.handle_character_data
     79     
     80     def handle_element_start(self, name, attributes):
     81         self.finish_character_data()
     82         line, column = self.pos()
     83         token = XmlToken(ELEMENT_START, name, attributes, line, column)
     84         self.tokens.append(token)
     85     
     86     def handle_element_end(self, name):
     87         self.finish_character_data()
     88         line, column = self.pos()
     89         token = XmlToken(ELEMENT_END, name, None, line, column)
     90         self.tokens.append(token)
     91 
     92     def handle_character_data(self, data):
     93         if not self.character_data:
     94             self.character_pos = self.pos()
     95         self.character_data += data
     96     
     97     def finish_character_data(self):
     98         if self.character_data:
     99             if not self.skip_ws or not self.character_data.isspace(): 
    100                 line, column = self.character_pos
    101                 token = XmlToken(CHARACTER_DATA, self.character_data, None, line, column)
    102                 self.tokens.append(token)
    103             self.character_data = ''
    104     
    105     def next(self):
    106         size = 16*1024
    107         while self.index >= len(self.tokens) and not self.final:
    108             self.tokens = []
    109             self.index = 0
    110             data = self.fp.read(size)
    111             self.final = len(data) < size
    112             data = data.rstrip('\0')
    113             try:
    114                 self.parser.Parse(data, self.final)
    115             except xml.parsers.expat.ExpatError, e:
    116                 #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
    117                 if e.code == 3:
    118                     pass
    119                 else:
    120                     raise e
    121         if self.index >= len(self.tokens):
    122             line, column = self.pos()
    123             token = XmlToken(EOF, None, None, line, column)
    124         else:
    125             token = self.tokens[self.index]
    126             self.index += 1
    127         return token
    128 
    129     def pos(self):
    130         return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
    131 
    132 
    133 class TokenMismatch(Exception):
    134 
    135     def __init__(self, expected, found):
    136         self.expected = expected
    137         self.found = found
    138 
    139     def __str__(self):
    140         return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
    141 
    142 
    143 
    144 class XmlParser:
    145     """Base XML document parser."""
    146 
    147     def __init__(self, fp):
    148         self.tokenizer = XmlTokenizer(fp)
    149         self.consume()
    150     
    151     def consume(self):
    152         self.token = self.tokenizer.next()
    153 
    154     def match_element_start(self, name):
    155         return self.token.type == ELEMENT_START and self.token.name_or_data == name
    156     
    157     def match_element_end(self, name):
    158         return self.token.type == ELEMENT_END and self.token.name_or_data == name
    159 
    160     def element_start(self, name):
    161         while self.token.type == CHARACTER_DATA:
    162             self.consume()
    163         if self.token.type != ELEMENT_START:
    164             raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
    165         if self.token.name_or_data != name:
    166             raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
    167         attrs = self.token.attrs
    168         self.consume()
    169         return attrs
    170     
    171     def element_end(self, name):
    172         while self.token.type == CHARACTER_DATA:
    173             self.consume()
    174         if self.token.type != ELEMENT_END:
    175             raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
    176         if self.token.name_or_data != name:
    177             raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
    178         self.consume()
    179 
    180     def character_data(self, strip = True):
    181         data = ''
    182         while self.token.type == CHARACTER_DATA:
    183             data += self.token.name_or_data
    184             self.consume()
    185         if strip:
    186             data = data.strip()
    187         return data
    188 
    189 
    190 class TraceParser(XmlParser):
    191 
    192     def __init__(self, fp):
    193         XmlParser.__init__(self, fp)
    194         self.last_call_no = 0
    195     
    196     def parse(self):
    197         self.element_start('trace')
    198         while self.token.type not in (ELEMENT_END, EOF):
    199             call = self.parse_call()
    200             self.handle_call(call)
    201         if self.token.type != EOF:
    202             self.element_end('trace')
    203 
    204     def parse_call(self):
    205         attrs = self.element_start('call')
    206         try:
    207             no = int(attrs['no'])
    208         except KeyError:
    209             self.last_call_no += 1
    210             no = self.last_call_no
    211         else:
    212             self.last_call_no = no
    213         klass = attrs['class']
    214         method = attrs['method']
    215         args = []
    216         ret = None
    217         time = None
    218         while self.token.type == ELEMENT_START:
    219             if self.token.name_or_data == 'arg':
    220                 arg = self.parse_arg()
    221                 args.append(arg)
    222             elif self.token.name_or_data == 'ret':
    223                 ret = self.parse_ret()
    224             elif self.token.name_or_data == 'call':
    225                 # ignore nested function calls
    226                 self.parse_call()
    227             elif self.token.name_or_data == 'time':
    228                 time = self.parse_time()
    229             else:
    230                 raise TokenMismatch("<arg ...> or <ret ...>", self.token)
    231         self.element_end('call')
    232         
    233         return Call(no, klass, method, args, ret, time)
    234 
    235     def parse_arg(self):
    236         attrs = self.element_start('arg')
    237         name = attrs['name']
    238         value = self.parse_value()
    239         self.element_end('arg')
    240 
    241         return name, value
    242 
    243     def parse_ret(self):
    244         attrs = self.element_start('ret')
    245         value = self.parse_value()
    246         self.element_end('ret')
    247 
    248         return value
    249 
    250     def parse_time(self):
    251         attrs = self.element_start('time')
    252         time = self.parse_value();
    253         self.element_end('time')
    254         return time
    255 
    256     def parse_value(self):
    257         expected_tokens = ('null', 'bool', 'int', 'uint', 'float', 'string', 'enum', 'array', 'struct', 'ptr', 'bytes')
    258         if self.token.type == ELEMENT_START:
    259             if self.token.name_or_data in expected_tokens:
    260                 method = getattr(self, 'parse_' +  self.token.name_or_data)
    261                 return method()
    262         raise TokenMismatch(" or " .join(expected_tokens), self.token)
    263 
    264     def parse_null(self):
    265         self.element_start('null')
    266         self.element_end('null')
    267         return Literal(None)
    268         
    269     def parse_bool(self):
    270         self.element_start('bool')
    271         value = int(self.character_data())
    272         self.element_end('bool')
    273         return Literal(value)
    274         
    275     def parse_int(self):
    276         self.element_start('int')
    277         value = int(self.character_data())
    278         self.element_end('int')
    279         return Literal(value)
    280         
    281     def parse_uint(self):
    282         self.element_start('uint')
    283         value = int(self.character_data())
    284         self.element_end('uint')
    285         return Literal(value)
    286         
    287     def parse_float(self):
    288         self.element_start('float')
    289         value = float(self.character_data())
    290         self.element_end('float')
    291         return Literal(value)
    292         
    293     def parse_enum(self):
    294         self.element_start('enum')
    295         name = self.character_data()
    296         self.element_end('enum')
    297         return NamedConstant(name)
    298         
    299     def parse_string(self):
    300         self.element_start('string')
    301         value = self.character_data()
    302         self.element_end('string')
    303         return Literal(value)
    304         
    305     def parse_bytes(self):
    306         self.element_start('bytes')
    307         value = self.character_data()
    308         self.element_end('bytes')
    309         return Blob(value)
    310         
    311     def parse_array(self):
    312         self.element_start('array')
    313         elems = []
    314         while self.token.type != ELEMENT_END:
    315             elems.append(self.parse_elem())
    316         self.element_end('array')
    317         return Array(elems)
    318 
    319     def parse_elem(self):
    320         self.element_start('elem')
    321         value = self.parse_value()
    322         self.element_end('elem')
    323         return value
    324 
    325     def parse_struct(self):
    326         attrs = self.element_start('struct')
    327         name = attrs['name']
    328         members = []
    329         while self.token.type != ELEMENT_END:
    330             members.append(self.parse_member())
    331         self.element_end('struct')
    332         return Struct(name, members)
    333 
    334     def parse_member(self):
    335         attrs = self.element_start('member')
    336         name = attrs['name']
    337         value = self.parse_value()
    338         self.element_end('member')
    339 
    340         return name, value
    341 
    342     def parse_ptr(self):
    343         self.element_start('ptr')
    344         address = self.character_data()
    345         self.element_end('ptr')
    346 
    347         return Pointer(address)
    348 
    349     def handle_call(self, call):
    350         pass
    351     
    352     
    353 class TraceDumper(TraceParser):
    354     
    355     def __init__(self, fp, outStream = sys.stdout):
    356         TraceParser.__init__(self, fp)
    357         self.formatter = format.DefaultFormatter(outStream)
    358         self.pretty_printer = PrettyPrinter(self.formatter)
    359 
    360     def handle_call(self, call):
    361         call.visit(self.pretty_printer)
    362         self.formatter.newline()
    363         
    364 
    365 class Main:
    366     '''Common main class for all retrace command line utilities.''' 
    367 
    368     def __init__(self):
    369         pass
    370 
    371     def main(self):
    372         optparser = self.get_optparser()
    373         (options, args) = optparser.parse_args(sys.argv[1:])
    374     
    375         if not args:
    376             optparser.error('insufficient number of arguments')
    377 
    378         for arg in args:
    379             if arg.endswith('.gz'):
    380                 from gzip import GzipFile
    381                 stream = GzipFile(arg, 'rt')
    382             elif arg.endswith('.bz2'):
    383                 from bz2 import BZ2File
    384                 stream = BZ2File(arg, 'rU')
    385             else:
    386                 stream = open(arg, 'rt')
    387             self.process_arg(stream, options)
    388 
    389     def get_optparser(self):
    390         optparser = optparse.OptionParser(
    391             usage="\n\t%prog [options] TRACE  [...]")
    392         return optparser
    393 
    394     def process_arg(self, stream, options):
    395         parser = TraceDumper(stream)
    396         parser.parse()
    397 
    398 
    399 if __name__ == '__main__':
    400     Main().main()
    401