Home | History | Annotate | Download | only in iozone
      1 #!/usr/bin/python
      2 """
      3 Postprocessing module for IOzone. It is capable to pick results from an
      4 IOzone run, calculate the geometric mean for all throughput results for
      5 a given file size or record size, and then generate a series of 2D and 3D
      6 graphs. The graph generation functionality depends on gnuplot, and if it
      7 is not present, functionality degrates gracefully.
      8 
      9 @copyright: Red Hat 2010
     10 """
     11 import os, sys, optparse, logging, math, time
     12 import common
     13 from autotest_lib.client.common_lib import logging_config, logging_manager
     14 from autotest_lib.client.common_lib import error
     15 from autotest_lib.client.bin import utils, os_dep
     16 
     17 
     18 _LABELS = ['file_size', 'record_size', 'write', 'rewrite', 'read', 'reread',
     19            'randread', 'randwrite', 'bkwdread', 'recordrewrite', 'strideread',
     20            'fwrite', 'frewrite', 'fread', 'freread']
     21 
     22 
     23 def unique(list):
     24     """
     25     Return a list of the elements in list, but without duplicates.
     26 
     27     @param list: List with values.
     28     @return: List with non duplicate elements.
     29     """
     30     n = len(list)
     31     if n == 0:
     32         return []
     33     u = {}
     34     try:
     35         for x in list:
     36             u[x] = 1
     37     except TypeError:
     38         return None
     39     else:
     40         return u.keys()
     41 
     42 
     43 def geometric_mean(values):
     44     """
     45     Evaluates the geometric mean for a list of numeric values.
     46 
     47     @param values: List with values.
     48     @return: Single value representing the geometric mean for the list values.
     49     @see: http://en.wikipedia.org/wiki/Geometric_mean
     50     """
     51     try:
     52         values = [int(value) for value in values]
     53     except ValueError:
     54         return None
     55     product = 1
     56     n = len(values)
     57     if n == 0:
     58         return None
     59     return math.exp(sum([math.log(x) for x in values])/n)
     60 
     61 
     62 def compare_matrices(matrix1, matrix2, treshold=0.05):
     63     """
     64     Compare 2 matrices nxm and return a matrix nxm with comparison data
     65 
     66     @param matrix1: Reference Matrix with numeric data
     67     @param matrix2: Matrix that will be compared
     68     @param treshold: Any difference bigger than this percent treshold will be
     69             reported.
     70     """
     71     improvements = 0
     72     regressions = 0
     73     same = 0
     74     comparison_matrix = []
     75 
     76     new_matrix = []
     77     for line1, line2 in zip(matrix1, matrix2):
     78         new_line = []
     79         for element1, element2 in zip(line1, line2):
     80             ratio = float(element2) / float(element1)
     81             if ratio < (1 - treshold):
     82                 regressions += 1
     83                 new_line.append((100 * ratio - 1) - 100)
     84             elif ratio > (1 + treshold):
     85                 improvements += 1
     86                 new_line.append("+" + str((100 * ratio - 1) - 100))
     87             else:
     88                 same + 1
     89                 if line1.index(element1) == 0:
     90                     new_line.append(element1)
     91                 else:
     92                     new_line.append(".")
     93         new_matrix.append(new_line)
     94 
     95     total = improvements + regressions + same
     96 
     97     return (new_matrix, improvements, regressions, total)
     98 
     99 
    100 class IOzoneAnalyzer(object):
    101     """
    102     Analyze an unprocessed IOzone file, and generate the following types of
    103     report:
    104 
    105     * Summary of throughput for all file and record sizes combined
    106     * Summary of throughput for all file sizes
    107     * Summary of throughput for all record sizes
    108 
    109     If more than one file is provided to the analyzer object, a comparison
    110     between the two runs is made, searching for regressions in performance.
    111     """
    112     def __init__(self, list_files, output_dir):
    113         self.list_files = list_files
    114         if not os.path.isdir(output_dir):
    115             os.makedirs(output_dir)
    116         self.output_dir = output_dir
    117         logging.info("Results will be stored in %s", output_dir)
    118 
    119 
    120     def average_performance(self, results, size=None):
    121         """
    122         Flattens a list containing performance results.
    123 
    124         @param results: List of n lists containing data from performance runs.
    125         @param size: Numerical value of a size (say, file_size) that was used
    126                 to filter the original results list.
    127         @return: List with 1 list containing average data from the performance
    128                 run.
    129         """
    130         average_line = []
    131         if size is not None:
    132             average_line.append(size)
    133         for i in range(2, 15):
    134             average = geometric_mean([line[i] for line in results]) / 1024.0
    135             average = int(average)
    136             average_line.append(average)
    137         return average_line
    138 
    139 
    140     def process_results(self, results, label=None):
    141         """
    142         Process a list of IOzone results according to label.
    143 
    144         @label: IOzone column label that we'll use to filter and compute
    145                 geometric mean results, in practical term either 'file_size'
    146                 or 'record_size'.
    147         @result: A list of n x m columns with original iozone results.
    148         @return: A list of n-? x (m-1) columns with geometric averages for
    149                 values of each label (ex, average for all file_sizes).
    150         """
    151         performance = []
    152         if label is not None:
    153             index = _LABELS.index(label)
    154             sizes = unique([line[index] for line in results])
    155             sizes.sort()
    156             for size in sizes:
    157                 r_results = [line for line in results if line[index] == size]
    158                 performance.append(self.average_performance(r_results, size))
    159         else:
    160             performance.append(self.average_performance(results))
    161 
    162         return performance
    163 
    164 
    165     def parse_file(self, file):
    166         """
    167         Parse an IOzone results file.
    168 
    169         @param file: File object that will be parsed.
    170         @return: Matrix containing IOzone results extracted from the file.
    171         """
    172         lines = []
    173         for line in file.readlines():
    174             fields = line.split()
    175             if len(fields) != 15:
    176                 continue
    177             try:
    178                 lines.append([int(i) for i in fields])
    179             except ValueError:
    180                 continue
    181         return lines
    182 
    183 
    184     def report(self, overall_results, record_size_results, file_size_results):
    185         """
    186         Generates analysis data for IOZone run.
    187 
    188         Generates a report to both logs (where it goes with nice headers) and
    189         output files for further processing (graph generation).
    190 
    191         @param overall_results: 1x15 Matrix containing IOzone results for all
    192                 file sizes
    193         @param record_size_results: nx15 Matrix containing IOzone results for
    194                 each record size tested.
    195         @param file_size_results: nx15 Matrix containing file size results
    196                 for each file size tested.
    197         """
    198         # Here we'll use the logging system to put the output of our analysis
    199         # to files
    200         logger = logging.getLogger()
    201         formatter = logging.Formatter("")
    202 
    203         logging.info("")
    204         logging.info("TABLE:  SUMMARY of ALL FILE and RECORD SIZES                        Results in MB/sec")
    205         logging.info("")
    206         logging.info("FILE & RECORD  INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE")
    207         logging.info("SIZES (KB)     WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
    208         logging.info("-------------------------------------------------------------------------------------------------------------------")
    209         for result_line in overall_results:
    210             logging.info("ALL            %-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
    211         logging.info("")
    212 
    213         logging.info("DRILLED DATA:")
    214 
    215         logging.info("")
    216         logging.info("TABLE:  RECORD Size against all FILE Sizes                          Results in MB/sec")
    217         logging.info("")
    218         logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
    219         logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
    220         logging.info("--------------------------------------------------------------------------------------------------------------")
    221 
    222         foutput_path = os.path.join(self.output_dir, '2d-datasource-file')
    223         if os.path.isfile(foutput_path):
    224             os.unlink(foutput_path)
    225         foutput = logging.FileHandler(foutput_path)
    226         foutput.setFormatter(formatter)
    227         logger.addHandler(foutput)
    228         for result_line in record_size_results:
    229             logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
    230         logger.removeHandler(foutput)
    231 
    232         logging.info("")
    233 
    234         logging.info("")
    235         logging.info("TABLE:  FILE Size against all RECORD Sizes                          Results in MB/sec")
    236         logging.info("")
    237         logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
    238         logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
    239         logging.info("--------------------------------------------------------------------------------------------------------------")
    240 
    241         routput_path = os.path.join(self.output_dir, '2d-datasource-record')
    242         if os.path.isfile(routput_path):
    243             os.unlink(routput_path)
    244         routput = logging.FileHandler(routput_path)
    245         routput.setFormatter(formatter)
    246         logger.addHandler(routput)
    247         for result_line in file_size_results:
    248             logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
    249         logger.removeHandler(routput)
    250 
    251         logging.info("")
    252 
    253 
    254     def report_comparison(self, record, file):
    255         """
    256         Generates comparison data for 2 IOZone runs.
    257 
    258         It compares 2 sets of nxm results and outputs a table with differences.
    259         If a difference higher or smaller than 5% is found, a warning is
    260         triggered.
    261 
    262         @param record: Tuple with 4 elements containing results for record size.
    263         @param file: Tuple with 4 elements containing results for file size.
    264         """
    265         (record_size, record_improvements, record_regressions,
    266          record_total) = record
    267         (file_size, file_improvements, file_regressions,
    268          file_total) = file
    269         logging.info("ANALYSIS of DRILLED DATA:")
    270 
    271         logging.info("")
    272         logging.info("TABLE:  RECsize Difference between runs                            Results are % DIFF")
    273         logging.info("")
    274         logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
    275         logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
    276         logging.info("--------------------------------------------------------------------------------------------------------------")
    277         for result_line in record_size:
    278             logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line))
    279         logging.info("REGRESSIONS: %d (%.2f%%)    Improvements: %d (%.2f%%)",
    280                      record_regressions,
    281                      (100 * record_regressions/float(record_total)),
    282                      record_improvements,
    283                      (100 * record_improvements/float(record_total)))
    284         logging.info("")
    285 
    286         logging.info("")
    287         logging.info("TABLE:  FILEsize Difference between runs                           Results are % DIFF")
    288         logging.info("")
    289         logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
    290         logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
    291         logging.info("--------------------------------------------------------------------------------------------------------------")
    292         for result_line in file_size:
    293             logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line))
    294         logging.info("REGRESSIONS: %d (%.2f%%)    Improvements: %d (%.2f%%)",
    295                      file_regressions,
    296                      (100 * file_regressions/float(file_total)),
    297                      file_improvements,
    298                      (100 * file_improvements/float(file_total)))
    299         logging.info("")
    300 
    301 
    302     def analyze(self):
    303         """
    304         Analyzes and eventually compares sets of IOzone data.
    305         """
    306         overall = []
    307         record_size = []
    308         file_size = []
    309         for path in self.list_files:
    310             file = open(path, 'r')
    311             logging.info('FILE: %s', path)
    312 
    313             results = self.parse_file(file)
    314 
    315             overall_results = self.process_results(results)
    316             record_size_results = self.process_results(results, 'record_size')
    317             file_size_results = self.process_results(results, 'file_size')
    318             self.report(overall_results, record_size_results, file_size_results)
    319 
    320             if len(self.list_files) == 2:
    321                 overall.append(overall_results)
    322                 record_size.append(record_size_results)
    323                 file_size.append(file_size_results)
    324 
    325         if len(self.list_files) == 2:
    326             record_comparison = compare_matrices(*record_size)
    327             file_comparison = compare_matrices(*file_size)
    328             self.report_comparison(record_comparison, file_comparison)
    329 
    330 
    331 class IOzonePlotter(object):
    332     """
    333     Plots graphs based on the results of an IOzone run.
    334 
    335     Plots graphs based on the results of an IOzone run. Uses gnuplot to
    336     generate the graphs.
    337     """
    338     def __init__(self, results_file, output_dir):
    339         self.active = True
    340         try:
    341             self.gnuplot = os_dep.command("gnuplot")
    342         except:
    343             logging.error("Command gnuplot not found, disabling graph "
    344                           "generation")
    345             self.active = False
    346 
    347         if not os.path.isdir(output_dir):
    348             os.makedirs(output_dir)
    349         self.output_dir = output_dir
    350 
    351         if not os.path.isfile(results_file):
    352             logging.error("Invalid file %s provided, disabling graph "
    353                           "generation", results_file)
    354             self.active = False
    355             self.results_file = None
    356         else:
    357             self.results_file = results_file
    358             self.generate_data_source()
    359 
    360 
    361     def generate_data_source(self):
    362         """
    363         Creates data file without headers for gnuplot consumption.
    364         """
    365         results_file = open(self.results_file, 'r')
    366         self.datasource = os.path.join(self.output_dir, '3d-datasource')
    367         datasource = open(self.datasource, 'w')
    368         for line in results_file.readlines():
    369             fields = line.split()
    370             if len(fields) != 15:
    371                 continue
    372             try:
    373                 values = [int(i) for i in fields]
    374                 datasource.write(line)
    375             except ValueError:
    376                 continue
    377         datasource.close()
    378 
    379 
    380     def plot_2d_graphs(self):
    381         """
    382         For each one of the throughput parameters, generate a set of gnuplot
    383         commands that will create a parametric surface with file size vs.
    384         record size vs. throughput.
    385         """
    386         datasource_2d = os.path.join(self.output_dir, '2d-datasource-file')
    387         for index, label in zip(range(2, 15), _LABELS[2:]):
    388             commands_path = os.path.join(self.output_dir, '2d-%s.do' % label)
    389             commands = ""
    390             commands += "set title 'Iozone performance: %s'\n" % label
    391             commands += "set logscale x\n"
    392             commands += "set xlabel 'File size (KB)'\n"
    393             commands += "set ylabel 'Througput (MB/s)'\n"
    394             commands += "set terminal png small size 450 350\n"
    395             commands += "set output '%s'\n" % os.path.join(self.output_dir,
    396                                                            '2d-%s.png' % label)
    397             commands += ("plot '%s' using 1:%s title '%s' with lines \n" %
    398                          (datasource_2d, index, label))
    399             commands_file = open(commands_path, 'w')
    400             commands_file.write(commands)
    401             commands_file.close()
    402             try:
    403                 utils.system("%s %s" % (self.gnuplot, commands_path))
    404             except error.CmdError:
    405                 logging.error("Problem plotting from commands file %s",
    406                               commands_path)
    407 
    408 
    409     def plot_3d_graphs(self):
    410         """
    411         For each one of the throughput parameters, generate a set of gnuplot
    412         commands that will create a parametric surface with file size vs.
    413         record size vs. throughput.
    414         """
    415         for index, label in zip(range(1, 14), _LABELS[2:]):
    416             commands_path = os.path.join(self.output_dir, '%s.do' % label)
    417             commands = ""
    418             commands += "set title 'Iozone performance: %s'\n" % label
    419             commands += "set grid lt 2 lw 1\n"
    420             commands += "set surface\n"
    421             commands += "set parametric\n"
    422             commands += "set xtics\n"
    423             commands += "set ytics\n"
    424             commands += "set logscale x 2\n"
    425             commands += "set logscale y 2\n"
    426             commands += "set logscale z\n"
    427             commands += "set xrange [2.**5:2.**24]\n"
    428             commands += "set xlabel 'File size (KB)'\n"
    429             commands += "set ylabel 'Record size (KB)'\n"
    430             commands += "set zlabel 'Througput (KB/s)'\n"
    431             commands += "set data style lines\n"
    432             commands += "set dgrid3d 80,80, 3\n"
    433             commands += "set terminal png small size 900 700\n"
    434             commands += "set output '%s'\n" % os.path.join(self.output_dir,
    435                                                            '%s.png' % label)
    436             commands += ("splot '%s' using 1:2:%s title '%s'\n" %
    437                          (self.datasource, index, label))
    438             commands_file = open(commands_path, 'w')
    439             commands_file.write(commands)
    440             commands_file.close()
    441             try:
    442                 utils.system("%s %s" % (self.gnuplot, commands_path))
    443             except error.CmdError:
    444                 logging.error("Problem plotting from commands file %s",
    445                               commands_path)
    446 
    447 
    448     def plot_all(self):
    449         """
    450         Plot all graphs that are to be plotted, provided that we have gnuplot.
    451         """
    452         if self.active:
    453             self.plot_2d_graphs()
    454             self.plot_3d_graphs()
    455 
    456 
    457 class AnalyzerLoggingConfig(logging_config.LoggingConfig):
    458     def configure_logging(self, results_dir=None, verbose=False):
    459         super(AnalyzerLoggingConfig, self).configure_logging(use_console=True,
    460                                                         verbose=verbose)
    461 
    462 
    463 if __name__ == "__main__":
    464     parser = optparse.OptionParser("usage: %prog [options] [filenames]")
    465     options, args = parser.parse_args()
    466 
    467     logging_manager.configure_logging(AnalyzerLoggingConfig())
    468 
    469     if args:
    470         filenames = args
    471     else:
    472         parser.print_help()
    473         sys.exit(1)
    474 
    475     if len(args) > 2:
    476         parser.print_help()
    477         sys.exit(1)
    478 
    479     o = os.path.join(os.getcwd(),
    480                      "iozone-graphs-%s" % time.strftime('%Y-%m-%d-%H.%M.%S'))
    481     if not os.path.isdir(o):
    482         os.makedirs(o)
    483 
    484     a = IOzoneAnalyzer(list_files=filenames, output_dir=o)
    485     a.analyze()
    486     p = IOzonePlotter(results_file=filenames[0], output_dir=o)
    487     p.plot_all()
    488