Home | History | Annotate | Download | only in bench
      1 #!/usr/bin/env python
      2 
      3 # Copyright (C) 2015 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the 'License');
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an 'AS IS' BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """
     18 Generates storage benchmark from captured strace output.
     19 
     20 Currently assumes that all mmap'ed regions are resource accesses, and emulates as pread().
     21 
     22 Usage:
     23 $ adb shell strace -p `pid zygote` -o /data/local/tmp/trace -f -ff -y -ttt -e trace=file,desc,munmap
     24 $ adb pull /data/local/tmp/trace*
     25 $ python benchgen.py trace.*
     26 
     27 """
     28 
     29 import re, sys, collections, traceback, argparse
     30 
     31 from operator import itemgetter
     32 from collections import defaultdict
     33 
     34 class Event:
     35     def __init__(self, thread, time, call, args, ret):
     36         self.thread = thread
     37         self.time = time
     38         self.call = call
     39         self.args = args
     40         self.ret = ret
     41 
     42     def __repr__(self):
     43         return "%s(%s)=%s" % (self.call, repr(self.args), self.ret)
     44 
     45 
     46 class File:
     47     def __init__(self, name, ident):
     48         self.name = name
     49         self.ident = ident
     50         self.size = 0
     51 
     52     def __repr__(self):
     53         return self.name
     54 
     55 
     56 events = []
     57 files = {}
     58 
     59 def find_file(name):
     60     name = name.strip('<>"')
     61     if name not in files:
     62         files[name] = File(name, len(files))
     63     return files[name]
     64 
     65 def extract_file(e, arg):
     66     if "<" in arg:
     67         fd, path = arg.split("<")
     68         path = path.strip(">")
     69         handle = "t%sf%s" % (e.thread, fd)
     70         return (fd, find_file(path), handle)
     71     else:
     72         return (None, None, None)
     73 
     74 def parse_args(s):
     75     args = []
     76     arg = ""
     77     esc = False
     78     quot = False
     79     for c in s:
     80         if esc:
     81             esc = False
     82             arg += c
     83             continue
     84 
     85         if c == '"':
     86             if quot:
     87                 quot = False
     88                 continue
     89             else:
     90                 quot = True
     91                 continue
     92 
     93         if c == '\\':
     94             esc = True
     95             continue
     96 
     97         if c == ',' and not quot:
     98             args.append(arg.strip())
     99             arg = ""
    100         else:
    101             arg += c
    102 
    103     args.append(arg.strip())
    104     return args
    105 
    106 
    107 bufsize = 1048576
    108 interesting = ["mmap2","read","write","pread64","pwrite64","fsync","fdatasync","openat","close","lseek","_llseek"]
    109 
    110 re_event = re.compile(r"^([\d\.]+) (.+?)\((.+?)\) = (.+?)$")
    111 re_arg = re.compile(r'''((?:[^,"']|"[^"]*"|'[^']*')+)''')
    112 for fn in sys.argv[1:]:
    113     with open(fn) as f:
    114         thread = int(fn.split(".")[-1])
    115         for line in f:
    116             line = re_event.match(line)
    117             if not line: continue
    118 
    119             time, call, args, ret = line.groups()
    120             if call not in interesting: continue
    121             if "/data/" not in args: continue
    122 
    123             time = float(time)
    124             args = parse_args(args)
    125             events.append(Event(thread, time, call, args, ret))
    126 
    127 
    128 with open("BenchmarkGen.h", 'w') as bench:
    129     print >>bench, """/*
    130  * Copyright (C) 2015 The Android Open Source Project
    131  *
    132  * Licensed under the Apache License, Version 2.0 (the "License");
    133  * you may not use this file except in compliance with the License.
    134  * You may obtain a copy of the License at
    135  *
    136  *      http://www.apache.org/licenses/LICENSE-2.0
    137  *
    138  * Unless required by applicable law or agreed to in writing, software
    139  * distributed under the License is distributed on an "AS IS" BASIS,
    140  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    141  * See the License for the specific language governing permissions and
    142  * limitations under the License.
    143  */
    144 
    145 
    146 /******************************************************************
    147  * THIS CODE WAS GENERATED BY benchgen.py, DO NOT MODIFY DIRECTLY *
    148  ******************************************************************/
    149 
    150 
    151 #include <android-base/logging.h>
    152 
    153 #include <stdlib.h>
    154 #include <sys/types.h>
    155 #include <sys/stat.h>
    156 #include <sys/sendfile.h>
    157 #include <fcntl.h>
    158 
    159 #include <algorithm>
    160 #include <string>
    161 
    162 #include <Utils.h>
    163 
    164 namespace android {
    165 namespace vold {
    166 
    167 static status_t BenchmarkRun() {
    168 """
    169 
    170     print >>bench, "char* buf = (char*) malloc(%d);" % (bufsize)
    171 
    172     nread = 0
    173     nwrite = 0
    174     nsync = 0
    175     events = sorted(events, key=lambda e: e.time)
    176     active = set()
    177     defined = set()
    178     for e in events:
    179         if e.call == "openat":
    180             fd, f, handle = extract_file(e, e.ret)
    181             if f:
    182                 active.add(handle)
    183                 if handle not in defined:
    184                     print >>bench, "int ",
    185                     defined.add(handle)
    186                 print >>bench, '%s = TEMP_FAILURE_RETRY(open("file%s", %s));' % (handle, f.ident, e.args[2])
    187 
    188         elif e.call == "close":
    189             fd, f, handle = extract_file(e, e.args[0])
    190             if handle in active:
    191                 active.remove(handle)
    192                 print >>bench, 'close(%s);' % (handle)
    193 
    194         elif e.call == "lseek":
    195             fd, f, handle = extract_file(e, e.args[0])
    196             if handle in active:
    197                 print >>bench, 'TEMP_FAILURE_RETRY(lseek(%s, %s, %s));' % (handle, e.args[1], e.args[2])
    198 
    199         elif e.call == "_llseek":
    200             fd, f, handle = extract_file(e, e.args[0])
    201             if handle in active:
    202                 print >>bench, 'TEMP_FAILURE_RETRY(lseek(%s, %s, %s));' % (handle, e.args[1], e.args[3])
    203 
    204         elif e.call == "read":
    205             fd, f, handle = extract_file(e, e.args[0])
    206             if handle in active:
    207                 # TODO: track actual file size instead of guessing
    208                 count = min(int(e.args[2]), bufsize)
    209                 f.size += count
    210                 print >>bench, 'TEMP_FAILURE_RETRY(read(%s, buf, %d));' % (handle, count)
    211                 nread += 1
    212 
    213         elif e.call == "write":
    214             fd, f, handle = extract_file(e, e.args[0])
    215             if handle in active:
    216                 # TODO: track actual file size instead of guessing
    217                 count = min(int(e.args[2]), bufsize)
    218                 f.size += count
    219                 print >>bench, 'TEMP_FAILURE_RETRY(write(%s, buf, %d));' % (handle, count)
    220                 nwrite += 1
    221 
    222         elif e.call == "pread64":
    223             fd, f, handle = extract_file(e, e.args[0])
    224             if handle in active:
    225                 f.size = max(f.size, int(e.args[2]) + int(e.args[3]))
    226                 count = min(int(e.args[2]), bufsize)
    227                 print >>bench, 'TEMP_FAILURE_RETRY(pread(%s, buf, %d, %s));' % (handle, count, e.args[3])
    228                 nread += 1
    229 
    230         elif e.call == "pwrite64":
    231             fd, f, handle = extract_file(e, e.args[0])
    232             if handle in active:
    233                 f.size = max(f.size, int(e.args[2]) + int(e.args[3]))
    234                 count = min(int(e.args[2]), bufsize)
    235                 print >>bench, 'TEMP_FAILURE_RETRY(pwrite(%s, buf, %d, %s));' % (handle, count, e.args[3])
    236                 nwrite += 1
    237 
    238         elif e.call == "fsync":
    239             fd, f, handle = extract_file(e, e.args[0])
    240             if handle in active:
    241                 print >>bench, 'TEMP_FAILURE_RETRY(fsync(%s));' % (handle)
    242                 nsync += 1
    243 
    244         elif e.call == "fdatasync":
    245             fd, f, handle = extract_file(e, e.args[0])
    246             if handle in active:
    247                 print >>bench, 'TEMP_FAILURE_RETRY(fdatasync(%s));' % (handle)
    248                 nsync += 1
    249 
    250         elif e.call == "mmap2":
    251             fd, f, handle = extract_file(e, e.args[4])
    252             if handle in active:
    253                 count = min(int(e.args[1]), bufsize)
    254                 offset = int(e.args[5], 0)
    255                 f.size = max(f.size, count + offset)
    256                 print >>bench, 'TEMP_FAILURE_RETRY(pread(%s, buf, %s, %s)); // mmap2' % (handle, count, offset)
    257                 nread += 1
    258 
    259     for handle in active:
    260         print >>bench, 'close(%s);' % (handle)
    261 
    262     print >>bench, """
    263 free(buf);
    264 return 0;
    265 }
    266 
    267 static status_t CreateFile(const char* name, int len) {
    268     int chunk = std::min(len, 65536);
    269     int out = -1;
    270     std::string buf;
    271 
    272     if (android::vold::ReadRandomBytes(chunk, buf) != OK) {
    273         LOG(ERROR) << "Failed to read random data";
    274         return -EIO;
    275     }
    276     if ((out = TEMP_FAILURE_RETRY(open(name, O_WRONLY|O_CREAT|O_TRUNC))) < 0) {
    277         PLOG(ERROR) << "Failed to open " << name;
    278         return -errno;
    279     }
    280 
    281     while (len > 0) {
    282         int n = write(out, buf.c_str(), std::min(len, chunk));
    283         if (n < 0) {
    284             PLOG(ERROR) << "Failed to write";
    285             close(out);
    286             return -errno;
    287         }
    288         len -= n;
    289     }
    290 
    291     close(out);
    292     return OK;
    293 }
    294 
    295 static status_t BenchmarkCreate() {
    296 status_t res = 0;
    297 res |= CreateFile("stub", 0);
    298 """
    299     for f in files.values():
    300         print >>bench, 'res |= CreateFile("file%s", %d);' % (f.ident, f.size)
    301 
    302     print >>bench, """
    303 return res;
    304 }
    305 
    306 static status_t BenchmarkDestroy() {
    307 status_t res = 0;
    308 res |= unlink("stub");
    309 """
    310     for f in files.values():
    311         print >>bench, 'res |= unlink("file%s");' % (f.ident)
    312 
    313     print >>bench, """
    314 return res;
    315 }
    316 
    317 static std::string BenchmarkIdent() {"""
    318     print >>bench, """return "r%d:w%d:s%d";""" % (nread, nwrite, nsync)
    319     print >>bench, """}
    320 
    321 }  // namespace vold
    322 }  // namespace android
    323 """
    324 
    325 
    326 size = sum([ f.size for f in files.values() ])
    327 print "Found", len(files), "data files accessed, total size", (size/1024), "kB"
    328 
    329 types = defaultdict(int)
    330 for e in events:
    331     types[e.call] += 1
    332 
    333 print "Found syscalls:"
    334 for t, n in types.iteritems():
    335     print str(n).rjust(8), t
    336 
    337 print
    338