1 #!/usr/bin/env python 2 # 3 # Copyright (C) 2016 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 """Analyze block trace""" 18 19 import collections 20 import os 21 import re 22 import string 23 import sys 24 25 # ex) <...>-52 [001] ...1 1.362574: block_bio_queue: 8,16 R 0 + 8 [kworker/u8:1] 26 RE_BLOCK = r'.+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+block_bio_queue:\s+([0-9]+)\,([0-9]+)\s(\S+)\s+([0-9]+)\s+\+\s+([0-9]+)\s+\[([^\]]+)' 27 # ex) <...>-453 [001] d..4 3.181854: sched_blocked_reason: pid=471 iowait=1 caller=__wait_on_buffer+0x24/0x2c 28 RE_SCHED_BLOCKED_READSON = r'.+-([0-9]+)\s+\[([0-9]+)\]\s.*\s+([0-9]+\.[0-9]+):\s+sched_blocked_reason:\spid=([0-9]+)\siowait=([01])\scaller=(\S+)' 29 # ex) <idle>-0 [000] d..3 3.181864: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ueventd next_pid=471 next_prio=120 30 RE_SCHED_SWITCH = r'.+-([0-9]+)\s+\[([0-9]+)\]\s.*\s+([0-9]+\.[0-9]+):\s+sched_switch:\sprev_comm=(.+)\sprev_pid=([0-9]+)\sprev_prio=([0-9]+)\sprev_state=(\S+).*next_comm=(.+)\snext_pid=([0-9]+)\snext_prio=([0-9]+)' 31 32 # dev_num = major * MULTIPLIER + minor 33 DEV_MAJOR_MULTIPLIER = 1000 34 35 # dm access is remapped to disk access. So account differently 36 DM_MAJOR = 253 37 38 MAX_PROCESS_DUMP = 10 39 40 class RwEvent: 41 def __init__(self, block_num, start_time, size): 42 self.block_num = block_num 43 self.start_time = start_time 44 self.size = size 45 46 def get_string_pos(strings, string_to_find): 47 for i, s in enumerate(strings): 48 if s == string_to_find: 49 return i 50 return -1 51 52 class ProcessData: 53 def __init__(self, name): 54 self.name = name 55 self.reads = {} # k : dev_num, v : [] of reads 56 self.per_device_total_reads = {} 57 self.writes = {} 58 self.per_device_total_writes = {} 59 self.total_reads = 0 60 self.total_writes = 0 61 self.total_dm_reads = 0 62 self.total_dm_writes = 0 63 64 65 def add_read_event(self, major, minor, event): 66 devNum = major * DEV_MAJOR_MULTIPLIER + minor; 67 events = self.reads.get(devNum) 68 if not events: 69 events = [] 70 self.reads[devNum] = events 71 self.per_device_total_reads[devNum] = 0 72 events.append(event) 73 self.total_reads += event.size 74 self.per_device_total_reads[devNum] += event.size 75 76 def add_write_event(self, major, minor, event): 77 devNum = major * DEV_MAJOR_MULTIPLIER + minor; 78 events = self.writes.get(devNum) 79 if not events: 80 events = [] 81 self.writes[devNum] = events 82 self.per_device_total_writes[devNum] = 0 83 events.append(event) 84 self.total_writes += event.size 85 self.per_device_total_writes[devNum] += event.size 86 87 def add_dm_read(self, size): 88 self.total_dm_reads += size 89 90 def add_dm_write(self, size): 91 self.total_dm_writes += size 92 93 def dump(self): 94 print "Process,", self.name 95 print " total reads,", self.total_reads 96 print " total writes,", self.total_writes 97 print " total dm reads,", self.total_dm_reads 98 print " total dm writes,", self.total_dm_writes 99 print " R per device" 100 sorted_r = collections.OrderedDict(sorted(self.per_device_total_reads.items(), \ 101 key = lambda item: item[1], reverse = True)) 102 for i in range(len(sorted_r)): 103 dev = sorted_r.popitem(last=False) 104 print " ", dev[0],dev[1] 105 106 print " W per device" 107 sorted_w = collections.OrderedDict(sorted(self.per_device_total_writes.items(), \ 108 key = lambda item: item[1], reverse = True)) 109 for i in range(len(sorted_w)): 110 dev = sorted_w.popitem(last=False) 111 print " ", dev[0],dev[1] 112 113 class IoTrace: 114 115 def __init__(self): 116 self.ios = {} #K: process name, v:ProcessData 117 self.total_reads = 0 118 self.total_writes = 0 119 self.total_reads_per_device = {} #K: block num, V: total blocks 120 self.total_writes_per_device = {} 121 self.total_dm_reads = {} #K: devnum, V: blocks 122 self.total_dm_writes = {} 123 self.re_block = re.compile(RE_BLOCK) 124 125 def parse(self, l): 126 match = self.re_block.match(l) 127 if not match: 128 return False 129 try: 130 self.do_parse_bio_queue(l, match) 131 except ValueError: 132 print "cannot parse:", l 133 raise 134 return True 135 136 def do_parse_bio_queue(self, l, match): 137 pid = match.group(1) 138 start_time = float(match.group(2))*1000 #ms 139 major = int(match.group(3)) 140 minor = int(match.group(4)) 141 devNum = major * DEV_MAJOR_MULTIPLIER + minor; 142 operation = match.group(5) 143 block_num = int(match.group(6)) 144 size = int(match.group(7)) 145 process = match.group(8) + "-" + pid 146 event = RwEvent(block_num, start_time, size) 147 io = self.ios.get(process) 148 if not io: 149 io = ProcessData(process) 150 self.ios[process] = io 151 if major == DM_MAJOR: 152 devNum = major * DEV_MAJOR_MULTIPLIER + minor; 153 if 'R' in operation[0]: 154 if devNum not in self.total_dm_reads: 155 self.total_dm_reads[devNum] = 0 156 self.total_dm_reads[devNum] += size 157 io.add_dm_read(size) 158 elif 'W' in operation[0]: 159 if devNum not in self.total_dm_writes: 160 self.total_dm_writes[devNum] = 0 161 self.total_dm_writes[devNum] += size 162 io.add_dm_write(size) 163 return 164 if 'R' in operation[0]: 165 io.add_read_event(major, minor, event) 166 self.total_reads += size 167 per_device = self.total_reads_per_device.get(devNum) 168 if not per_device: 169 self.total_reads_per_device[devNum] = 0 170 self.total_reads_per_device[devNum] += size 171 elif 'W' in operation[0]: 172 io.add_write_event(major, minor, event) 173 self.total_writes += size 174 per_device = self.total_writes_per_device.get(devNum) 175 if not per_device: 176 self.total_writes_per_device[devNum] = 0 177 self.total_writes_per_device[devNum] += size 178 179 def dump(self): 180 print "total read blocks,", self.total_reads 181 print "total write blocks,", self.total_writes 182 print "Total DM R" 183 for dev,size in self.total_dm_reads.items(): 184 print dev, size 185 print "Total DM W" 186 for dev,size in self.total_dm_writes.items(): 187 print dev, size 188 print "**Process total R/W" 189 sorted_by_total_rw = collections.OrderedDict(sorted(self.ios.items(), \ 190 key = lambda item: item[1].total_reads + item[1].total_writes, reverse = True)) 191 for i in range(MAX_PROCESS_DUMP): 192 process = sorted_by_total_rw.popitem(last=False) 193 if not process: 194 break 195 process[1].dump() 196 197 print "**Process total W" 198 sorted_by_total_w = collections.OrderedDict(sorted(self.ios.items(), \ 199 key = lambda item: item[1].total_writes, reverse = True)) 200 for i in range(5): 201 process = sorted_by_total_w.popitem(last=False) 202 if not process: 203 break 204 process[1].dump() 205 206 print "**Device total R" 207 sorted_by_total_r = collections.OrderedDict(sorted(self.total_reads_per_device.items(), \ 208 key = lambda item: item[1], reverse = True)) 209 for i in range(len(sorted_by_total_r)): 210 dev = sorted_by_total_r.popitem(last=False) 211 print dev[0],dev[1] 212 213 print "**Device total W" 214 sorted_by_total_w = collections.OrderedDict(sorted(self.total_writes_per_device.items(), \ 215 key = lambda item: item[1], reverse = True)) 216 for i in range(len(sorted_by_total_w)): 217 dev = sorted_by_total_w.popitem(last=False) 218 print dev[0],dev[1] 219 220 class SchedProcess: 221 def __init__(self, pid): 222 self.pid = pid 223 self.name = "unknown" 224 self.total_execution_time = 0.0 225 self.total_io_wait_time = 0.0 226 self.total_other_wait_time = 0.0 227 self.waiting_calls = {} # k: waiting_call, v : waiting counter 228 self.io_waiting_call_times = {} # k: waiting_call, v: total wait time 229 self.in_iowait = False 230 self.last_waiting_call = None 231 self.last_switch_out_time = 0.0 232 self.last_switch_in_time = 0.0 233 self.last_core = -1 234 self.execution_time_per_core = {} # k: core, v : time 235 self.io_latency_histograms = {} # k : delay in ms, v : count 236 237 def handle_reason(self, current_time, iowait, waiting_call): 238 #if self.pid == 1232: 239 # print current_time, iowait, waiting_call 240 if iowait == 1: 241 self.in_iowait = True 242 self.last_waiting_call = waiting_call 243 call_counter = self.waiting_calls.get(waiting_call, 0) 244 call_counter += 1 245 self.waiting_calls[waiting_call] = call_counter 246 247 def handle_switch_out(self, current_time, out_state, priority, name, core): 248 #if self.pid == 1232: 249 # print "out", current_time, out_state 250 if self.name != name: 251 self.name = name 252 self.last_switch_out_time = current_time 253 if self.last_switch_in_time == 0.0: # switch in not recorded. ignore this one 254 return 255 execution_time = current_time - self.last_switch_in_time 256 self.total_execution_time += execution_time 257 core_execution_time = self.execution_time_per_core.get(core, 0.0) 258 core_execution_time += execution_time 259 self.execution_time_per_core[core] = core_execution_time 260 261 def handle_switch_in(self, current_time, priority, name, core): 262 #if self.pid == 1232: 263 # print "in", current_time, self.in_iowait 264 if self.name != name: 265 self.name = name 266 self.last_switch_in_time = current_time 267 if self.last_switch_out_time == 0.0: # in without out, probably 1st 268 self.in_iowait = False 269 return 270 wait_time = current_time - self.last_switch_out_time 271 if self.in_iowait: 272 self.total_io_wait_time += wait_time 273 total_waiting_call_time = self.io_waiting_call_times.get(self.last_waiting_call, 0.0) 274 total_waiting_call_time += wait_time 275 self.io_waiting_call_times[self.last_waiting_call] = total_waiting_call_time 276 wait_time_ms = int(wait_time*10) / 10.0 # resolution up to 0.1 ms 277 histogram_count = self.io_latency_histograms.get(wait_time_ms, 0) 278 histogram_count += 1 279 self.io_latency_histograms[wait_time_ms] = histogram_count 280 else: 281 self.total_other_wait_time += wait_time 282 self.in_iowait = False 283 284 285 def dump(self): 286 print "PID:", self.pid, " name:", self.name 287 print " total execution time:", self.total_execution_time,\ 288 " io wait:", self.total_io_wait_time, " other wait:", self.total_other_wait_time 289 sorted_data = collections.OrderedDict(sorted(self.execution_time_per_core.items(), \ 290 key = lambda item: item[0], reverse = False)) 291 print " Core execution:", sorted_data 292 sorted_data = collections.OrderedDict(sorted(self.waiting_calls.items(), \ 293 key = lambda item: item[1], reverse = True)) 294 print " Wait calls:", sorted_data 295 sorted_data = collections.OrderedDict(sorted(self.io_waiting_call_times.items(), \ 296 key = lambda item: item[1], reverse = True)) 297 print " IO Wait time per wait calls:", sorted_data 298 sorted_data = collections.OrderedDict(sorted(self.io_latency_histograms.items(), \ 299 key = lambda item: item[0], reverse = False)) 300 print " Wait time histogram:", sorted_data 301 302 class SchedTrace: 303 def __init__(self): 304 self.re_switch = re.compile(RE_SCHED_SWITCH) 305 self.re_reason = re.compile(RE_SCHED_BLOCKED_READSON) 306 self.processes = {} # key: pid, v : SchedProcess 307 308 def parse(self, l): 309 checked_reason = False 310 match = self.re_switch.match(l) 311 if not match: 312 match = self.re_reason.match(l) 313 checked_reason = True 314 if not match: 315 return False 316 try: 317 if checked_reason: 318 self.do_handle_reason(l, match) 319 else: 320 self.do_handle_switch(l, match) 321 except ValueError: 322 print "cannot parse:", l 323 raise 324 return True 325 326 def do_handle_switch(self, l, match): 327 current_pid = int(match.group(1)) 328 cpu_core = int(match.group(2)) 329 current_time = float(match.group(3))*1000 #ms 330 out_name = match.group(4) 331 out_pid = int(match.group(5)) 332 out_prio = int(match.group(6)) 333 out_state = match.group(7) 334 in_name = match.group(8) 335 in_pid = int(match.group(9)) 336 in_prio = int(match.group(10)) 337 out_process = self.processes.get(out_pid) 338 if not out_process: 339 out_process = SchedProcess(out_pid) 340 self.processes[out_pid] = out_process 341 in_process = self.processes.get(in_pid) 342 if not in_process: 343 in_process = SchedProcess(in_pid) 344 self.processes[in_pid] = in_process 345 out_process.handle_switch_out(current_time, out_state, out_prio, out_name, cpu_core) 346 in_process.handle_switch_in(current_time, in_prio, in_name, cpu_core) 347 348 def do_handle_reason(self, l, match): 349 current_pid = int(match.group(1)) 350 cpu_core = int(match.group(2)) 351 current_time = float(match.group(3))*1000 #ms 352 pid = int(match.group(4)) 353 iowait = int(match.group(5)) 354 waiting_call = match.group(6) 355 process = self.processes.get(pid) 356 if not process: 357 process = SchedProcess(pid) 358 self.processes[pid] = process 359 process.handle_reason(current_time, iowait, waiting_call) 360 361 def dump(self): 362 sorted_by_total_execution = collections.OrderedDict(sorted(self.processes.items(), \ 363 key = lambda item: item[1].total_io_wait_time, reverse = True)) 364 for k, v in sorted_by_total_execution.iteritems(): 365 if v.total_execution_time > 10.0 or v.total_io_wait_time != 0.0: 366 v.dump() 367 368 def main(argv): 369 if (len(argv) < 2): 370 print "check_io_trace_all.py filename" 371 return 372 filename = argv[1] 373 374 io_trace = IoTrace() 375 sched_trace = SchedTrace() 376 with open(filename) as f: 377 for l in f: 378 if io_trace.parse(l): 379 continue 380 sched_trace.parse(l) 381 io_trace.dump() 382 print "\n\n\n" 383 sched_trace.dump() 384 385 if __name__ == '__main__': 386 main(sys.argv) 387