1 /* 2 * mem-memcpy.c 3 * 4 * memcpy: Simple memory copy in various ways 5 * 6 * Written by Hitoshi Mitake <mitake (at) dcl.info.waseda.ac.jp> 7 */ 8 #include <ctype.h> 9 10 #include "../perf.h" 11 #include "../util/util.h" 12 #include "../util/parse-options.h" 13 #include "../util/header.h" 14 #include "bench.h" 15 #include "mem-memcpy-arch.h" 16 17 #include <stdio.h> 18 #include <stdlib.h> 19 #include <string.h> 20 #include <sys/time.h> 21 #include <errno.h> 22 23 #define K 1024 24 25 static const char *length_str = "1MB"; 26 static const char *routine = "default"; 27 static bool use_clock; 28 static int clock_fd; 29 static bool only_prefault; 30 static bool no_prefault; 31 32 static const struct option options[] = { 33 OPT_STRING('l', "length", &length_str, "1MB", 34 "Specify length of memory to copy. " 35 "available unit: B, MB, GB (upper and lower)"), 36 OPT_STRING('r', "routine", &routine, "default", 37 "Specify routine to copy"), 38 OPT_BOOLEAN('c', "clock", &use_clock, 39 "Use CPU clock for measuring"), 40 OPT_BOOLEAN('o', "only-prefault", &only_prefault, 41 "Show only the result with page faults before memcpy()"), 42 OPT_BOOLEAN('n', "no-prefault", &no_prefault, 43 "Show only the result without page faults before memcpy()"), 44 OPT_END() 45 }; 46 47 typedef void *(*memcpy_t)(void *, const void *, size_t); 48 49 struct routine { 50 const char *name; 51 const char *desc; 52 memcpy_t fn; 53 }; 54 55 struct routine routines[] = { 56 { "default", 57 "Default memcpy() provided by glibc", 58 memcpy }, 59 #ifdef ARCH_X86_64 60 61 #define MEMCPY_FN(fn, name, desc) { name, desc, fn }, 62 #include "mem-memcpy-x86-64-asm-def.h" 63 #undef MEMCPY_FN 64 65 #endif 66 67 { NULL, 68 NULL, 69 NULL } 70 }; 71 72 static const char * const bench_mem_memcpy_usage[] = { 73 "perf bench mem memcpy <options>", 74 NULL 75 }; 76 77 static struct perf_event_attr clock_attr = { 78 .type = PERF_TYPE_HARDWARE, 79 .config = PERF_COUNT_HW_CPU_CYCLES 80 }; 81 82 static void init_clock(void) 83 { 84 clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); 85 86 if (clock_fd < 0 && errno == ENOSYS) 87 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 88 else 89 BUG_ON(clock_fd < 0); 90 } 91 92 static u64 get_clock(void) 93 { 94 int ret; 95 u64 clk; 96 97 ret = read(clock_fd, &clk, sizeof(u64)); 98 BUG_ON(ret != sizeof(u64)); 99 100 return clk; 101 } 102 103 static double timeval2double(struct timeval *ts) 104 { 105 return (double)ts->tv_sec + 106 (double)ts->tv_usec / (double)1000000; 107 } 108 109 static void alloc_mem(void **dst, void **src, size_t length) 110 { 111 *dst = zalloc(length); 112 if (!dst) 113 die("memory allocation failed - maybe length is too large?\n"); 114 115 *src = zalloc(length); 116 if (!src) 117 die("memory allocation failed - maybe length is too large?\n"); 118 } 119 120 static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) 121 { 122 u64 clock_start = 0ULL, clock_end = 0ULL; 123 void *src = NULL, *dst = NULL; 124 125 alloc_mem(&src, &dst, len); 126 127 if (prefault) 128 fn(dst, src, len); 129 130 clock_start = get_clock(); 131 fn(dst, src, len); 132 clock_end = get_clock(); 133 134 free(src); 135 free(dst); 136 return clock_end - clock_start; 137 } 138 139 static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) 140 { 141 struct timeval tv_start, tv_end, tv_diff; 142 void *src = NULL, *dst = NULL; 143 144 alloc_mem(&src, &dst, len); 145 146 if (prefault) 147 fn(dst, src, len); 148 149 gettimeofday(&tv_start, NULL); 150 fn(dst, src, len); 151 gettimeofday(&tv_end, NULL); 152 153 timersub(&tv_end, &tv_start, &tv_diff); 154 155 free(src); 156 free(dst); 157 return (double)((double)len / timeval2double(&tv_diff)); 158 } 159 160 #define pf (no_prefault ? 0 : 1) 161 162 #define print_bps(x) do { \ 163 if (x < K) \ 164 printf(" %14lf B/Sec", x); \ 165 else if (x < K * K) \ 166 printf(" %14lfd KB/Sec", x / K); \ 167 else if (x < K * K * K) \ 168 printf(" %14lf MB/Sec", x / K / K); \ 169 else \ 170 printf(" %14lf GB/Sec", x / K / K / K); \ 171 } while (0) 172 173 int bench_mem_memcpy(int argc, const char **argv, 174 const char *prefix __used) 175 { 176 int i; 177 size_t len; 178 double result_bps[2]; 179 u64 result_clock[2]; 180 181 argc = parse_options(argc, argv, options, 182 bench_mem_memcpy_usage, 0); 183 184 if (use_clock) 185 init_clock(); 186 187 len = (size_t)perf_atoll((char *)length_str); 188 189 result_clock[0] = result_clock[1] = 0ULL; 190 result_bps[0] = result_bps[1] = 0.0; 191 192 if ((s64)len <= 0) { 193 fprintf(stderr, "Invalid length:%s\n", length_str); 194 return 1; 195 } 196 197 /* same to without specifying either of prefault and no-prefault */ 198 if (only_prefault && no_prefault) 199 only_prefault = no_prefault = false; 200 201 for (i = 0; routines[i].name; i++) { 202 if (!strcmp(routines[i].name, routine)) 203 break; 204 } 205 if (!routines[i].name) { 206 printf("Unknown routine:%s\n", routine); 207 printf("Available routines...\n"); 208 for (i = 0; routines[i].name; i++) { 209 printf("\t%s ... %s\n", 210 routines[i].name, routines[i].desc); 211 } 212 return 1; 213 } 214 215 if (bench_format == BENCH_FORMAT_DEFAULT) 216 printf("# Copying %s Bytes ...\n\n", length_str); 217 218 if (!only_prefault && !no_prefault) { 219 /* show both of results */ 220 if (use_clock) { 221 result_clock[0] = 222 do_memcpy_clock(routines[i].fn, len, false); 223 result_clock[1] = 224 do_memcpy_clock(routines[i].fn, len, true); 225 } else { 226 result_bps[0] = 227 do_memcpy_gettimeofday(routines[i].fn, 228 len, false); 229 result_bps[1] = 230 do_memcpy_gettimeofday(routines[i].fn, 231 len, true); 232 } 233 } else { 234 if (use_clock) { 235 result_clock[pf] = 236 do_memcpy_clock(routines[i].fn, 237 len, only_prefault); 238 } else { 239 result_bps[pf] = 240 do_memcpy_gettimeofday(routines[i].fn, 241 len, only_prefault); 242 } 243 } 244 245 switch (bench_format) { 246 case BENCH_FORMAT_DEFAULT: 247 if (!only_prefault && !no_prefault) { 248 if (use_clock) { 249 printf(" %14lf Clock/Byte\n", 250 (double)result_clock[0] 251 / (double)len); 252 printf(" %14lf Clock/Byte (with prefault)\n", 253 (double)result_clock[1] 254 / (double)len); 255 } else { 256 print_bps(result_bps[0]); 257 printf("\n"); 258 print_bps(result_bps[1]); 259 printf(" (with prefault)\n"); 260 } 261 } else { 262 if (use_clock) { 263 printf(" %14lf Clock/Byte", 264 (double)result_clock[pf] 265 / (double)len); 266 } else 267 print_bps(result_bps[pf]); 268 269 printf("%s\n", only_prefault ? " (with prefault)" : ""); 270 } 271 break; 272 case BENCH_FORMAT_SIMPLE: 273 if (!only_prefault && !no_prefault) { 274 if (use_clock) { 275 printf("%lf %lf\n", 276 (double)result_clock[0] / (double)len, 277 (double)result_clock[1] / (double)len); 278 } else { 279 printf("%lf %lf\n", 280 result_bps[0], result_bps[1]); 281 } 282 } else { 283 if (use_clock) { 284 printf("%lf\n", (double)result_clock[pf] 285 / (double)len); 286 } else 287 printf("%lf\n", result_bps[pf]); 288 } 289 break; 290 default: 291 /* reaching this means there's some disaster: */ 292 die("unknown format: %d\n", bench_format); 293 break; 294 } 295 296 return 0; 297 } 298