Home | History | Annotate | Download | only in io_analysis
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (C) 2016 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 #
     17 """Analyze block trace"""
     18 
     19 import collections
     20 import os
     21 import re
     22 import string
     23 import sys
     24 
     25 # ex) <...>-52    [001] ...1     1.362574: block_bio_queue: 8,16 R 0 + 8 [kworker/u8:1]
     26 RE_BLOCK = r'.+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+block_bio_queue:\s+([0-9]+)\,([0-9]+)\s(\S+)\s+([0-9]+)\s+\+\s+([0-9]+)\s+\[([^\]]+)'
     27 # ex)  <...>-453   [001] d..4     3.181854: sched_blocked_reason: pid=471 iowait=1 caller=__wait_on_buffer+0x24/0x2c
     28 RE_SCHED_BLOCKED_READSON = r'.+-([0-9]+)\s+\[([0-9]+)\]\s.*\s+([0-9]+\.[0-9]+):\s+sched_blocked_reason:\spid=([0-9]+)\siowait=([01])\scaller=(\S+)'
     29 # ex) <idle>-0     [000] d..3     3.181864: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ueventd next_pid=471 next_prio=120
     30 RE_SCHED_SWITCH = r'.+-([0-9]+)\s+\[([0-9]+)\]\s.*\s+([0-9]+\.[0-9]+):\s+sched_switch:\sprev_comm=(.+)\sprev_pid=([0-9]+)\sprev_prio=([0-9]+)\sprev_state=(\S+).*next_comm=(.+)\snext_pid=([0-9]+)\snext_prio=([0-9]+)'
     31 
     32 # dev_num = major * MULTIPLIER + minor
     33 DEV_MAJOR_MULTIPLIER = 1000
     34 
     35 # dm access is remapped to disk access. So account differently
     36 DM_MAJOR = 253
     37 
     38 MAX_PROCESS_DUMP = 10
     39 
     40 class RwEvent:
     41   def __init__(self, block_num, start_time, size):
     42     self.block_num = block_num
     43     self.start_time = start_time
     44     self.size = size
     45 
     46 def get_string_pos(strings, string_to_find):
     47   for i, s in enumerate(strings):
     48     if s == string_to_find:
     49       return i
     50   return -1
     51 
     52 class ProcessData:
     53   def __init__(self, name):
     54     self.name = name
     55     self.reads = {} # k : dev_num, v : [] of reads
     56     self.per_device_total_reads = {}
     57     self.writes = {}
     58     self.per_device_total_writes = {}
     59     self.total_reads = 0
     60     self.total_writes = 0
     61     self.total_dm_reads = 0
     62     self.total_dm_writes = 0
     63 
     64 
     65   def add_read_event(self, major, minor, event):
     66     devNum = major * DEV_MAJOR_MULTIPLIER + minor;
     67     events = self.reads.get(devNum)
     68     if not events:
     69       events = []
     70       self.reads[devNum] = events
     71       self.per_device_total_reads[devNum] = 0
     72     events.append(event)
     73     self.total_reads += event.size
     74     self.per_device_total_reads[devNum] += event.size
     75 
     76   def add_write_event(self, major, minor, event):
     77     devNum = major * DEV_MAJOR_MULTIPLIER + minor;
     78     events = self.writes.get(devNum)
     79     if not events:
     80       events = []
     81       self.writes[devNum] = events
     82       self.per_device_total_writes[devNum] = 0
     83     events.append(event)
     84     self.total_writes += event.size
     85     self.per_device_total_writes[devNum] += event.size
     86 
     87   def add_dm_read(self, size):
     88     self.total_dm_reads += size
     89 
     90   def add_dm_write(self, size):
     91     self.total_dm_writes += size
     92 
     93   def dump(self):
     94     print "Process,", self.name
     95     print " total reads,", self.total_reads
     96     print " total writes,", self.total_writes
     97     print " total dm reads,", self.total_dm_reads
     98     print " total dm writes,", self.total_dm_writes
     99     print " R per device"
    100     sorted_r = collections.OrderedDict(sorted(self.per_device_total_reads.items(), \
    101       key = lambda item: item[1], reverse = True))
    102     for i in range(len(sorted_r)):
    103       dev = sorted_r.popitem(last=False)
    104       print " ", dev[0],dev[1]
    105 
    106     print " W per device"
    107     sorted_w = collections.OrderedDict(sorted(self.per_device_total_writes.items(), \
    108       key = lambda item: item[1], reverse = True))
    109     for i in range(len(sorted_w)):
    110       dev = sorted_w.popitem(last=False)
    111       print " ", dev[0],dev[1]
    112 
    113 class IoTrace:
    114 
    115   def __init__(self):
    116     self.ios = {} #K: process name, v:ProcessData
    117     self.total_reads = 0
    118     self.total_writes = 0
    119     self.total_reads_per_device = {} #K: block num, V: total blocks
    120     self.total_writes_per_device = {}
    121     self.total_dm_reads = {} #K: devnum, V: blocks
    122     self.total_dm_writes = {}
    123     self.re_block = re.compile(RE_BLOCK)
    124 
    125   def parse(self, l):
    126     match = self.re_block.match(l)
    127     if not match:
    128       return False
    129     try:
    130       self.do_parse_bio_queue(l, match)
    131     except ValueError:
    132       print "cannot parse:", l
    133       raise
    134     return True
    135 
    136   def do_parse_bio_queue(self, l, match):
    137     pid = match.group(1)
    138     start_time = float(match.group(2))*1000 #ms
    139     major = int(match.group(3))
    140     minor =  int(match.group(4))
    141     devNum = major * DEV_MAJOR_MULTIPLIER + minor;
    142     operation =  match.group(5)
    143     block_num = int(match.group(6))
    144     size = int(match.group(7))
    145     process = match.group(8) + "-" + pid
    146     event = RwEvent(block_num, start_time, size)
    147     io = self.ios.get(process)
    148     if not io:
    149       io = ProcessData(process)
    150       self.ios[process] = io
    151     if major == DM_MAJOR:
    152       devNum = major * DEV_MAJOR_MULTIPLIER + minor;
    153       if 'R' in operation[0]:
    154         if devNum not in self.total_dm_reads:
    155           self.total_dm_reads[devNum] = 0
    156         self.total_dm_reads[devNum] += size
    157         io.add_dm_read(size)
    158       elif 'W' in operation[0]:
    159         if devNum not in self.total_dm_writes:
    160           self.total_dm_writes[devNum] = 0
    161         self.total_dm_writes[devNum] += size
    162         io.add_dm_write(size)
    163       return
    164     if 'R' in operation[0]:
    165       io.add_read_event(major, minor, event)
    166       self.total_reads += size
    167       per_device = self.total_reads_per_device.get(devNum)
    168       if not per_device:
    169         self.total_reads_per_device[devNum] = 0
    170       self.total_reads_per_device[devNum] += size
    171     elif 'W' in operation[0]:
    172       io.add_write_event(major, minor, event)
    173       self.total_writes += size
    174       per_device = self.total_writes_per_device.get(devNum)
    175       if not per_device:
    176         self.total_writes_per_device[devNum] = 0
    177       self.total_writes_per_device[devNum] += size
    178 
    179   def dump(self):
    180     print "total read blocks,", self.total_reads
    181     print "total write blocks,", self.total_writes
    182     print "Total DM R"
    183     for dev,size in self.total_dm_reads.items():
    184       print dev, size
    185     print "Total DM W"
    186     for dev,size in self.total_dm_writes.items():
    187       print dev, size
    188     print "**Process total R/W"
    189     sorted_by_total_rw = collections.OrderedDict(sorted(self.ios.items(), \
    190       key = lambda item: item[1].total_reads + item[1].total_writes, reverse = True))
    191     for i in range(MAX_PROCESS_DUMP):
    192       process = sorted_by_total_rw.popitem(last=False)
    193       if not process:
    194         break
    195       process[1].dump()
    196 
    197     print "**Process total W"
    198     sorted_by_total_w = collections.OrderedDict(sorted(self.ios.items(), \
    199       key = lambda item: item[1].total_writes, reverse = True))
    200     for i in range(5):
    201       process = sorted_by_total_w.popitem(last=False)
    202       if not process:
    203         break
    204       process[1].dump()
    205 
    206     print "**Device total R"
    207     sorted_by_total_r = collections.OrderedDict(sorted(self.total_reads_per_device.items(), \
    208       key = lambda item: item[1], reverse = True))
    209     for i in range(len(sorted_by_total_r)):
    210       dev = sorted_by_total_r.popitem(last=False)
    211       print dev[0],dev[1]
    212 
    213     print "**Device total W"
    214     sorted_by_total_w = collections.OrderedDict(sorted(self.total_writes_per_device.items(), \
    215       key = lambda item: item[1], reverse = True))
    216     for i in range(len(sorted_by_total_w)):
    217       dev = sorted_by_total_w.popitem(last=False)
    218       print dev[0],dev[1]
    219 
    220 class SchedProcess:
    221   def __init__(self, pid):
    222     self.pid = pid
    223     self.name = "unknown"
    224     self.total_execution_time = 0.0
    225     self.total_io_wait_time = 0.0
    226     self.total_other_wait_time = 0.0
    227     self.waiting_calls = {} # k: waiting_call, v : waiting counter
    228     self.io_waiting_call_times = {} # k: waiting_call, v: total wait time
    229     self.in_iowait = False
    230     self.last_waiting_call = None
    231     self.last_switch_out_time = 0.0
    232     self.last_switch_in_time = 0.0
    233     self.last_core = -1
    234     self.execution_time_per_core = {} # k: core, v : time
    235     self.io_latency_histograms = {} # k : delay in ms, v : count
    236 
    237   def handle_reason(self, current_time, iowait, waiting_call):
    238     #if self.pid == 1232:
    239     #  print current_time, iowait, waiting_call
    240     if iowait == 1:
    241       self.in_iowait = True
    242     self.last_waiting_call = waiting_call
    243     call_counter = self.waiting_calls.get(waiting_call, 0)
    244     call_counter += 1
    245     self.waiting_calls[waiting_call] = call_counter
    246 
    247   def handle_switch_out(self, current_time, out_state, priority, name, core):
    248     #if self.pid == 1232:
    249     #  print "out", current_time, out_state
    250     if self.name != name:
    251       self.name = name
    252     self.last_switch_out_time = current_time
    253     if self.last_switch_in_time == 0.0: # switch in not recorded. ignore this one
    254       return
    255     execution_time = current_time - self.last_switch_in_time
    256     self.total_execution_time += execution_time
    257     core_execution_time = self.execution_time_per_core.get(core, 0.0)
    258     core_execution_time += execution_time
    259     self.execution_time_per_core[core] = core_execution_time
    260 
    261   def handle_switch_in(self, current_time, priority, name, core):
    262     #if self.pid == 1232:
    263     #  print "in", current_time, self.in_iowait
    264     if self.name != name:
    265       self.name = name
    266     self.last_switch_in_time = current_time
    267     if self.last_switch_out_time == 0.0: # in without out, probably 1st
    268       self.in_iowait = False
    269       return
    270     wait_time = current_time - self.last_switch_out_time
    271     if self.in_iowait:
    272       self.total_io_wait_time += wait_time
    273       total_waiting_call_time = self.io_waiting_call_times.get(self.last_waiting_call, 0.0)
    274       total_waiting_call_time += wait_time
    275       self.io_waiting_call_times[self.last_waiting_call] = total_waiting_call_time
    276       wait_time_ms = int(wait_time*10) / 10.0 # resolution up to 0.1 ms
    277       histogram_count = self.io_latency_histograms.get(wait_time_ms, 0)
    278       histogram_count += 1
    279       self.io_latency_histograms[wait_time_ms] = histogram_count
    280     else:
    281       self.total_other_wait_time += wait_time
    282     self.in_iowait = False
    283 
    284 
    285   def dump(self):
    286     print "PID:", self.pid, " name:", self.name
    287     print " total execution time:", self.total_execution_time,\
    288       " io wait:", self.total_io_wait_time, " other wait:", self.total_other_wait_time
    289     sorted_data = collections.OrderedDict(sorted(self.execution_time_per_core.items(), \
    290       key = lambda item: item[0], reverse = False))
    291     print " Core execution:", sorted_data
    292     sorted_data = collections.OrderedDict(sorted(self.waiting_calls.items(), \
    293       key = lambda item: item[1], reverse = True))
    294     print " Wait calls:", sorted_data
    295     sorted_data = collections.OrderedDict(sorted(self.io_waiting_call_times.items(), \
    296       key = lambda item: item[1], reverse = True))
    297     print " IO Wait time per wait calls:", sorted_data
    298     sorted_data = collections.OrderedDict(sorted(self.io_latency_histograms.items(), \
    299       key = lambda item: item[0], reverse = False))
    300     print " Wait time histogram:", sorted_data
    301 
    302 class SchedTrace:
    303   def __init__(self):
    304     self.re_switch = re.compile(RE_SCHED_SWITCH)
    305     self.re_reason = re.compile(RE_SCHED_BLOCKED_READSON)
    306     self.processes = {} # key: pid, v : SchedProcess
    307 
    308   def parse(self, l):
    309     checked_reason = False
    310     match = self.re_switch.match(l)
    311     if not match:
    312       match = self.re_reason.match(l)
    313       checked_reason = True
    314     if not match:
    315       return False
    316     try:
    317       if checked_reason:
    318         self.do_handle_reason(l, match)
    319       else:
    320         self.do_handle_switch(l, match)
    321     except ValueError:
    322       print "cannot parse:", l
    323       raise
    324     return True
    325 
    326   def do_handle_switch(self, l, match):
    327     current_pid = int(match.group(1))
    328     cpu_core = int(match.group(2))
    329     current_time = float(match.group(3))*1000 #ms
    330     out_name = match.group(4)
    331     out_pid = int(match.group(5))
    332     out_prio = int(match.group(6))
    333     out_state = match.group(7)
    334     in_name = match.group(8)
    335     in_pid = int(match.group(9))
    336     in_prio = int(match.group(10))
    337     out_process = self.processes.get(out_pid)
    338     if not out_process:
    339       out_process = SchedProcess(out_pid)
    340       self.processes[out_pid] = out_process
    341     in_process = self.processes.get(in_pid)
    342     if not in_process:
    343       in_process = SchedProcess(in_pid)
    344       self.processes[in_pid] = in_process
    345     out_process.handle_switch_out(current_time, out_state, out_prio, out_name, cpu_core)
    346     in_process.handle_switch_in(current_time, in_prio, in_name, cpu_core)
    347 
    348   def do_handle_reason(self, l, match):
    349     current_pid = int(match.group(1))
    350     cpu_core = int(match.group(2))
    351     current_time = float(match.group(3))*1000 #ms
    352     pid = int(match.group(4))
    353     iowait = int(match.group(5))
    354     waiting_call = match.group(6)
    355     process = self.processes.get(pid)
    356     if not process:
    357       process = SchedProcess(pid)
    358       self.processes[pid] = process
    359     process.handle_reason(current_time, iowait, waiting_call)
    360 
    361   def dump(self):
    362     sorted_by_total_execution = collections.OrderedDict(sorted(self.processes.items(), \
    363       key = lambda item: item[1].total_io_wait_time, reverse = True))
    364     for k, v in sorted_by_total_execution.iteritems():
    365       if v.total_execution_time > 10.0 or v.total_io_wait_time != 0.0:
    366         v.dump()
    367 
    368 def main(argv):
    369   if (len(argv) < 2):
    370     print "check_io_trace_all.py filename"
    371     return
    372   filename = argv[1]
    373 
    374   io_trace = IoTrace()
    375   sched_trace = SchedTrace()
    376   with open(filename) as f:
    377     for l in f:
    378       if io_trace.parse(l):
    379         continue
    380       sched_trace.parse(l)
    381   io_trace.dump()
    382   print "\n\n\n"
    383   sched_trace.dump()
    384 
    385 if __name__ == '__main__':
    386   main(sys.argv)
    387