Home | History | Annotate | Download | only in scripts
      1 #!/bin/bash
      2 
      3 ########################
      4 # Function definitions #
      5 ########################
      6 
      7 source "$(dirname $0)/measurement-functions"
      8 
      9 function run_test {
     10   local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p
     11 
     12   tmp="/tmp/test-timing.$$"
     13 
     14   rm -f "${tmp}"
     15   p=1
     16   test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
     17   read avg1 stddev1 vsz1 vszdev1 rest < "$tmp"
     18   echo "Average time: ${avg1} +/- ${stddev1} seconds." \
     19        " VSZ: ${vsz1} +/- ${vszdev1} KB"
     20 
     21   if [ "${rest}" != "" ]; then
     22     echo "Internal error ($rest)"
     23     exit 1
     24   fi
     25 
     26   rm -f "${tmp}"
     27   p=2
     28   test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
     29   read avg2 stddev2 vsz2 vszdev2 rest < "$tmp"
     30   echo "Average time: ${avg2} +/- ${stddev2} seconds." \
     31        " VSZ: ${vsz2} +/- ${vszdev2} KB"
     32 
     33   if [ "${rest}" != "" ]; then
     34     echo "Internal error ($rest)"
     35     exit 1
     36   fi
     37 
     38   rm -f "${tmp}"
     39   p=4
     40   test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
     41   read avg4 stddev4 vsz4 vszdev4 rest < "$tmp"
     42   echo "Average time: ${avg4} +/- ${stddev4} seconds." \
     43        " VSZ: ${vsz4} +/- ${vszdev4} KB"
     44   rm -f "$tmp"
     45 
     46   if [ "${rest}" != "" ]; then
     47     echo "Internal error ($rest)"
     48     exit 1
     49   fi
     50 
     51   p=1
     52   test_output="/dev/null" \
     53   print_runtime_ratio "${avg1}" "${stddev1}" "${vsz1}" "${vszdev1}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"
     54 
     55   p=4
     56   test_output="/dev/null" \
     57   print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"
     58 
     59   p=4
     60   test_output="${1}-drd-with-stack-var-4.out" \
     61   print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
     62     "$VG" --tool=drd --first-race-only=yes --check-stack-var=yes \
     63     --drd-stats=yes "$@" -p${psep}${p} "${test_args}"
     64 
     65   p=4
     66   test_output="${1}-drd-without-stack-var-4.out" \
     67   print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
     68     "$VG" --tool=drd --first-race-only=yes --check-stack-var=no \
     69     --drd-stats=yes "$@" -p${psep}${p} "${test_args}"
     70 
     71   p=4
     72   test_output="${1}-helgrind-4-none.out" \
     73   print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=none "$@" -p${psep}${p} "${test_args}"
     74 
     75   p=4
     76   test_output="${1}-helgrind-4-approx.out" \
     77   print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=approx "$@" -p${psep}${p} "${test_args}"
     78 
     79   p=4
     80   test_output="${1}-helgrind-4-full.out" \
     81   print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=full "$@" -p${psep}${p} "${test_args}"
     82 
     83   echo ''
     84 }
     85 
     86 
     87 ########################
     88 # Script body          #
     89 ########################
     90 
     91 DRD_SCRIPTS_DIR="$(dirname $0)"
     92 if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then
     93   DRD_SCRIPTS_DIR="$PWD/$DRD_SCRIPTS_DIR"
     94 fi
     95 
     96 SPLASH2="${DRD_SCRIPTS_DIR}/../splash2"
     97 if [ ! -e "${SPLASH2}" ]; then
     98   echo "Error: splash2 directory not found (${SPLASH2})."
     99   exit 1
    100 fi
    101 
    102 if [ "$VG" = "" ]; then
    103   VG="${DRD_SCRIPTS_DIR}/../../vg-in-place"
    104 fi
    105 
    106 if [ ! -e "$VG" ]; then
    107   echo "Could not find $VG."
    108   exit 1
    109 fi
    110 
    111 ######################################################################################################################
    112 # Meaning of the different colums:
    113 #  1. SPLASH2 test name.
    114 #  2. Execution time in seconds for native run with argument -p1.
    115 #  3. Virtual memory size in KB for the native run with argument -p1.
    116 #  4. Execution time in seconds for native run with argument -p2.
    117 #  5. Virtual memory size in KB for the native run with argument -p2.
    118 #  6. Execution time in seconds for native run with argument -p4.
    119 #  7. Virtual memory size in KB for the native run with argument -p4.
    120 #  8. Execution time ratio for --tool=none -p1 versus -p1.
    121 #  9. Virtual memory size ratio for --tool=none -p1 versus -p1.
    122 # 10. Execution time ratio for --tool=none -p4 versus -p4.
    123 # 11. Virtual memory size ratio for --tool=none -p4 versus -p4.
    124 # 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
    125 # 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
    126 # 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
    127 # 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
    128 # 16. Execution time ratio for --tool=helgrind --history-level=none -p4 versus -p4.
    129 # 17. Virtual memory size ratio for --tool=helgrind --history-level=none -p4 versus -p4.
    130 # 18. Execution time ratio for --tool=helgrind --history-level=approx -p4 versus -p4.
    131 # 19. Virtual memory size ratio for --tool=helgrind --history-level=approx -p4 versus -p4.
    132 # 20. Execution time ratio for --tool=helgrind --history-level=full -p4 versus -p4.
    133 # 21. Virtual memory size ratio for --tool=helgrind --history-level=full -p4 versus -p4.
    134 # 22. Execution time ratio for Intel Thread Checker -p4 versus -p4.
    135 # 23. Execution time ratio for Intel Thread Checker -p4 versus -p4.
    136 #
    137 # Notes:
    138 # - Both Helgrind and DRD use a granularity of one byte for data race detection.
    139 # - Helgrind does detect data races on stack variables. DRD only detects
    140 #   data races on stack variables with --check-stack-var=yes.
    141 # - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most
    142 #   likely running a 32-bit OS. Not yet clear to me: which OS ? Which
    143 #   granularity does ITC use ? And which m4 macro's have been used by ITC as
    144 #   implementation of the synchronization primitives ?
    145 #
    146 #     1                    2     3      4      5      6     7     8    9   10   11   12  13     14  15    16    17  18    19  20    21   22   23
    147 ################################################################################################################################################
    148 # Results:                native       native       native       none      none       DRD        DRD      HG        HG        HG         ITC ITC
    149 #                         -p1          -p2          -p4           -p1       -p4       -p4       -p4+f     -p4       -p4       -p4      -p4 -p4+f
    150 # ..............................................................................................................................................
    151 # Cholesky                0.11  12016  0.06  22016  0.55  41328 10.3  4.92  1.7 2.14   15 2.61    8 2.61   10  3.96  10  3.96  15  6.14  239  82
    152 # FFT                     0.02   6692  0.02  14888  0.02  31621 17.0  8.01 20.0 2.48  114 3.15   64 3.28   81  4.52  81  4.52 116  5.56   90  41
    153 # LU, contiguous          0.08   4100  0.05  12304  0.06  28712 11.1 12.44 18.5 2.64  104 3.18   70 3.18   87  4.84  89  4.84 118  5.55  428 128
    154 # Ocean, contiguous       0.23  16848  0.19  25384  0.23  42528  6.3  3.78  8.3 2.11   87 2.82   62 4.02   71  3.75  71  3.75 195  5.96   90  28
    155 # Radix                   0.21  15136  0.14  23336  0.15  39728 12.6  4.10 22.3 2.19   61 2.87   41 2.94   52  4.03  52  4.03  85  6.13  222  56
    156 # Raytrace                0.63 207104  0.49 215296  0.49 231680  8.9  1.23 12.9 1.20  385 1.38   86 2.10  158  3.70 160  3.70 222  4.15  172  53
    157 # Water-n2                0.18  10696  0.09  27072  0.11  59832 12.5  5.46 26.7 1.80 3092 3.03  263 3.06   92  3.28  92  3.28  92  3.55  189  39
    158 # Water-sp                0.20   4444  0.15  13536  0.10  30269 10.6 11.56 27.0 2.52  405 3.29   69 3.42   95  4.59  95  4.59  97  4.73  183  34
    159 # ..............................................................................................................................................
    160 # geometric mean          0.14  13024  0.10  25669  0.14  47655 10.8  5.26 13.5 2.08  161 2.71   59 3.03   66  4.05  66  4.05  95  5.13  180  51
    161 # ..............................................................................................................................................
    162 # Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM.                                                        
    163 # Software: openSUSE 11.0 (64-bit edition), runlevel 3, kernel 2.6.30.1, gcc 4.3.1, 32 bit SPLASH-2 executables, valgrind trunk r10648.
    164 ################################################################################################################################################
    165 
    166 ####
    167 # Notes:
    168 # - The ITC performance numbers in the above table originate from table 1 in
    169 #   the following paper:
    170 #   Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas,
    171 #   Accurate and efficient filtering for the Intel thread checker race
    172 #   detector, Proceedings of the 1st workshop on Architectural and system
    173 #   support for improving software dependability, San Jose, California,
    174 #   2006. Pages: 34 - 41.
    175 # - The input parameters for benchmarks below originate from table 1 in the
    176 #   following paper:
    177 #   The SPLASH-2 programs: characterization and methodological considerations
    178 #   Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A.
    179 #   1995. Proceedings of the 22nd Annual International Symposium on Computer
    180 #   Architecture, 22-24 Jun 1995, Page(s): 24 - 36.
    181 #   ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z
    182 ####
    183 
    184 cache_size=$(get_cache_size)
    185 log2_cache_size=$(log2 ${cache_size})
    186 
    187 # Cholesky
    188 (
    189   cd ${SPLASH2}/codes/kernels/cholesky/inputs
    190   for f in *Z
    191   do
    192     gzip -cd <$f >${f%.Z}
    193   done
    194   test_args=tk15.O run_test ../CHOLESKY -C$((cache_size))
    195 )
    196 
    197 # FFT
    198 run_test ${SPLASH2}/codes/kernels/fft/FFT -t -l$((log2_cache_size/2)) -m16
    199 
    200 # LU, contiguous blocks.
    201 run_test ${SPLASH2}/codes/kernels/lu/contiguous_blocks/LU -n512
    202 
    203 # LU, non-contiguous blocks.
    204 #run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512
    205 
    206 # Ocean
    207 run_test ${SPLASH2}/codes/apps/ocean/contiguous_partitions/OCEAN -n258
    208 #run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258
    209 
    210 # Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why.
    211 if [ $(uname -p) = "i686" ]; then
    212   psep=' ' run_test ${SPLASH2}/codes/apps/radiosity/RADIOSITY -batch -room -ae 5000.0 -en 0.050 -bf 0.10
    213 fi
    214 
    215 # Radix
    216 run_test ${SPLASH2}/codes/kernels/radix/RADIX -n$((2**20)) -r1024
    217 
    218 # Raytrace
    219 (
    220   cd ${SPLASH2}/codes/apps/raytrace/inputs
    221   rm -f *.env *.geo *.rl
    222   for f in *Z
    223   do
    224     gzip -cd <$f >${f%.Z}
    225   done
    226   cd ..
    227   test_args=inputs/car.env psep='' run_test ./RAYTRACE -m64
    228 )
    229 
    230 # Water-n2
    231 (
    232   cd ${SPLASH2}/codes/apps/water-nsquared
    233   test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-NSQUARED
    234 )
    235 
    236 # Water-sp
    237 (
    238   cd ${SPLASH2}/codes/apps/water-spatial
    239   test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-SPATIAL
    240 )
    241 
    242 
    243 
    244 # Local variables:
    245 # compile-command: "./run-splash2"
    246 # End:
    247