Home | History | Annotate | Download | only in bench
      1 /*
      2  * mem-memcpy.c
      3  *
      4  * memcpy: Simple memory copy in various ways
      5  *
      6  * Written by Hitoshi Mitake <mitake (at) dcl.info.waseda.ac.jp>
      7  */
      8 #include <ctype.h>
      9 
     10 #include "../perf.h"
     11 #include "../util/util.h"
     12 #include "../util/parse-options.h"
     13 #include "../util/header.h"
     14 #include "bench.h"
     15 #include "mem-memcpy-arch.h"
     16 
     17 #include <stdio.h>
     18 #include <stdlib.h>
     19 #include <string.h>
     20 #include <sys/time.h>
     21 #include <errno.h>
     22 
     23 #define K 1024
     24 
     25 static const char	*length_str	= "1MB";
     26 static const char	*routine	= "default";
     27 static bool		use_clock;
     28 static int		clock_fd;
     29 static bool		only_prefault;
     30 static bool		no_prefault;
     31 
     32 static const struct option options[] = {
     33 	OPT_STRING('l', "length", &length_str, "1MB",
     34 		    "Specify length of memory to copy. "
     35 		    "available unit: B, MB, GB (upper and lower)"),
     36 	OPT_STRING('r', "routine", &routine, "default",
     37 		    "Specify routine to copy"),
     38 	OPT_BOOLEAN('c', "clock", &use_clock,
     39 		    "Use CPU clock for measuring"),
     40 	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
     41 		    "Show only the result with page faults before memcpy()"),
     42 	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
     43 		    "Show only the result without page faults before memcpy()"),
     44 	OPT_END()
     45 };
     46 
     47 typedef void *(*memcpy_t)(void *, const void *, size_t);
     48 
     49 struct routine {
     50 	const char *name;
     51 	const char *desc;
     52 	memcpy_t fn;
     53 };
     54 
     55 struct routine routines[] = {
     56 	{ "default",
     57 	  "Default memcpy() provided by glibc",
     58 	  memcpy },
     59 #ifdef ARCH_X86_64
     60 
     61 #define MEMCPY_FN(fn, name, desc) { name, desc, fn },
     62 #include "mem-memcpy-x86-64-asm-def.h"
     63 #undef MEMCPY_FN
     64 
     65 #endif
     66 
     67 	{ NULL,
     68 	  NULL,
     69 	  NULL   }
     70 };
     71 
     72 static const char * const bench_mem_memcpy_usage[] = {
     73 	"perf bench mem memcpy <options>",
     74 	NULL
     75 };
     76 
     77 static struct perf_event_attr clock_attr = {
     78 	.type		= PERF_TYPE_HARDWARE,
     79 	.config		= PERF_COUNT_HW_CPU_CYCLES
     80 };
     81 
     82 static void init_clock(void)
     83 {
     84 	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
     85 
     86 	if (clock_fd < 0 && errno == ENOSYS)
     87 		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
     88 	else
     89 		BUG_ON(clock_fd < 0);
     90 }
     91 
     92 static u64 get_clock(void)
     93 {
     94 	int ret;
     95 	u64 clk;
     96 
     97 	ret = read(clock_fd, &clk, sizeof(u64));
     98 	BUG_ON(ret != sizeof(u64));
     99 
    100 	return clk;
    101 }
    102 
    103 static double timeval2double(struct timeval *ts)
    104 {
    105 	return (double)ts->tv_sec +
    106 		(double)ts->tv_usec / (double)1000000;
    107 }
    108 
    109 static void alloc_mem(void **dst, void **src, size_t length)
    110 {
    111 	*dst = zalloc(length);
    112 	if (!dst)
    113 		die("memory allocation failed - maybe length is too large?\n");
    114 
    115 	*src = zalloc(length);
    116 	if (!src)
    117 		die("memory allocation failed - maybe length is too large?\n");
    118 }
    119 
    120 static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
    121 {
    122 	u64 clock_start = 0ULL, clock_end = 0ULL;
    123 	void *src = NULL, *dst = NULL;
    124 
    125 	alloc_mem(&src, &dst, len);
    126 
    127 	if (prefault)
    128 		fn(dst, src, len);
    129 
    130 	clock_start = get_clock();
    131 	fn(dst, src, len);
    132 	clock_end = get_clock();
    133 
    134 	free(src);
    135 	free(dst);
    136 	return clock_end - clock_start;
    137 }
    138 
    139 static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
    140 {
    141 	struct timeval tv_start, tv_end, tv_diff;
    142 	void *src = NULL, *dst = NULL;
    143 
    144 	alloc_mem(&src, &dst, len);
    145 
    146 	if (prefault)
    147 		fn(dst, src, len);
    148 
    149 	gettimeofday(&tv_start, NULL);
    150 	fn(dst, src, len);
    151 	gettimeofday(&tv_end, NULL);
    152 
    153 	timersub(&tv_end, &tv_start, &tv_diff);
    154 
    155 	free(src);
    156 	free(dst);
    157 	return (double)((double)len / timeval2double(&tv_diff));
    158 }
    159 
    160 #define pf (no_prefault ? 0 : 1)
    161 
    162 #define print_bps(x) do {					\
    163 		if (x < K)					\
    164 			printf(" %14lf B/Sec", x);		\
    165 		else if (x < K * K)				\
    166 			printf(" %14lfd KB/Sec", x / K);	\
    167 		else if (x < K * K * K)				\
    168 			printf(" %14lf MB/Sec", x / K / K);	\
    169 		else						\
    170 			printf(" %14lf GB/Sec", x / K / K / K); \
    171 	} while (0)
    172 
    173 int bench_mem_memcpy(int argc, const char **argv,
    174 		     const char *prefix __used)
    175 {
    176 	int i;
    177 	size_t len;
    178 	double result_bps[2];
    179 	u64 result_clock[2];
    180 
    181 	argc = parse_options(argc, argv, options,
    182 			     bench_mem_memcpy_usage, 0);
    183 
    184 	if (use_clock)
    185 		init_clock();
    186 
    187 	len = (size_t)perf_atoll((char *)length_str);
    188 
    189 	result_clock[0] = result_clock[1] = 0ULL;
    190 	result_bps[0] = result_bps[1] = 0.0;
    191 
    192 	if ((s64)len <= 0) {
    193 		fprintf(stderr, "Invalid length:%s\n", length_str);
    194 		return 1;
    195 	}
    196 
    197 	/* same to without specifying either of prefault and no-prefault */
    198 	if (only_prefault && no_prefault)
    199 		only_prefault = no_prefault = false;
    200 
    201 	for (i = 0; routines[i].name; i++) {
    202 		if (!strcmp(routines[i].name, routine))
    203 			break;
    204 	}
    205 	if (!routines[i].name) {
    206 		printf("Unknown routine:%s\n", routine);
    207 		printf("Available routines...\n");
    208 		for (i = 0; routines[i].name; i++) {
    209 			printf("\t%s ... %s\n",
    210 			       routines[i].name, routines[i].desc);
    211 		}
    212 		return 1;
    213 	}
    214 
    215 	if (bench_format == BENCH_FORMAT_DEFAULT)
    216 		printf("# Copying %s Bytes ...\n\n", length_str);
    217 
    218 	if (!only_prefault && !no_prefault) {
    219 		/* show both of results */
    220 		if (use_clock) {
    221 			result_clock[0] =
    222 				do_memcpy_clock(routines[i].fn, len, false);
    223 			result_clock[1] =
    224 				do_memcpy_clock(routines[i].fn, len, true);
    225 		} else {
    226 			result_bps[0] =
    227 				do_memcpy_gettimeofday(routines[i].fn,
    228 						len, false);
    229 			result_bps[1] =
    230 				do_memcpy_gettimeofday(routines[i].fn,
    231 						len, true);
    232 		}
    233 	} else {
    234 		if (use_clock) {
    235 			result_clock[pf] =
    236 				do_memcpy_clock(routines[i].fn,
    237 						len, only_prefault);
    238 		} else {
    239 			result_bps[pf] =
    240 				do_memcpy_gettimeofday(routines[i].fn,
    241 						len, only_prefault);
    242 		}
    243 	}
    244 
    245 	switch (bench_format) {
    246 	case BENCH_FORMAT_DEFAULT:
    247 		if (!only_prefault && !no_prefault) {
    248 			if (use_clock) {
    249 				printf(" %14lf Clock/Byte\n",
    250 					(double)result_clock[0]
    251 					/ (double)len);
    252 				printf(" %14lf Clock/Byte (with prefault)\n",
    253 					(double)result_clock[1]
    254 					/ (double)len);
    255 			} else {
    256 				print_bps(result_bps[0]);
    257 				printf("\n");
    258 				print_bps(result_bps[1]);
    259 				printf(" (with prefault)\n");
    260 			}
    261 		} else {
    262 			if (use_clock) {
    263 				printf(" %14lf Clock/Byte",
    264 					(double)result_clock[pf]
    265 					/ (double)len);
    266 			} else
    267 				print_bps(result_bps[pf]);
    268 
    269 			printf("%s\n", only_prefault ? " (with prefault)" : "");
    270 		}
    271 		break;
    272 	case BENCH_FORMAT_SIMPLE:
    273 		if (!only_prefault && !no_prefault) {
    274 			if (use_clock) {
    275 				printf("%lf %lf\n",
    276 					(double)result_clock[0] / (double)len,
    277 					(double)result_clock[1] / (double)len);
    278 			} else {
    279 				printf("%lf %lf\n",
    280 					result_bps[0], result_bps[1]);
    281 			}
    282 		} else {
    283 			if (use_clock) {
    284 				printf("%lf\n", (double)result_clock[pf]
    285 					/ (double)len);
    286 			} else
    287 				printf("%lf\n", result_bps[pf]);
    288 		}
    289 		break;
    290 	default:
    291 		/* reaching this means there's some disaster: */
    292 		die("unknown format: %d\n", bench_format);
    293 		break;
    294 	}
    295 
    296 	return 0;
    297 }
    298