Home | History | Annotate | Download | only in bench
      1 /*
      2  * mem-memcpy.c
      3  *
      4  * memcpy: Simple memory copy in various ways
      5  *
      6  * Written by Hitoshi Mitake <mitake (at) dcl.info.waseda.ac.jp>
      7  */
      8 
      9 #include "../perf.h"
     10 #include "../util/util.h"
     11 #include "../util/parse-options.h"
     12 #include "../util/header.h"
     13 #include "bench.h"
     14 #include "mem-memcpy-arch.h"
     15 
     16 #include <stdio.h>
     17 #include <stdlib.h>
     18 #include <string.h>
     19 #include <sys/time.h>
     20 #include <errno.h>
     21 
     22 #define K 1024
     23 
     24 static const char	*length_str	= "1MB";
     25 static const char	*routine	= "default";
     26 static int		iterations	= 1;
     27 static bool		use_cycle;
     28 static int		cycle_fd;
     29 static bool		only_prefault;
     30 static bool		no_prefault;
     31 
     32 static const struct option options[] = {
     33 	OPT_STRING('l', "length", &length_str, "1MB",
     34 		    "Specify length of memory to copy. "
     35 		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
     36 	OPT_STRING('r', "routine", &routine, "default",
     37 		    "Specify routine to copy"),
     38 	OPT_INTEGER('i', "iterations", &iterations,
     39 		    "repeat memcpy() invocation this number of times"),
     40 	OPT_BOOLEAN('c', "cycle", &use_cycle,
     41 		    "Use cycles event instead of gettimeofday() for measuring"),
     42 	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
     43 		    "Show only the result with page faults before memcpy()"),
     44 	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
     45 		    "Show only the result without page faults before memcpy()"),
     46 	OPT_END()
     47 };
     48 
     49 typedef void *(*memcpy_t)(void *, const void *, size_t);
     50 
     51 struct routine {
     52 	const char *name;
     53 	const char *desc;
     54 	memcpy_t fn;
     55 };
     56 
     57 struct routine routines[] = {
     58 	{ "default",
     59 	  "Default memcpy() provided by glibc",
     60 	  memcpy },
     61 #ifdef ARCH_X86_64
     62 
     63 #define MEMCPY_FN(fn, name, desc) { name, desc, fn },
     64 #include "mem-memcpy-x86-64-asm-def.h"
     65 #undef MEMCPY_FN
     66 
     67 #endif
     68 
     69 	{ NULL,
     70 	  NULL,
     71 	  NULL   }
     72 };
     73 
     74 static const char * const bench_mem_memcpy_usage[] = {
     75 	"perf bench mem memcpy <options>",
     76 	NULL
     77 };
     78 
     79 static struct perf_event_attr cycle_attr = {
     80 	.type		= PERF_TYPE_HARDWARE,
     81 	.config		= PERF_COUNT_HW_CPU_CYCLES
     82 };
     83 
     84 static void init_cycle(void)
     85 {
     86 	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0);
     87 
     88 	if (cycle_fd < 0 && errno == ENOSYS)
     89 		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
     90 	else
     91 		BUG_ON(cycle_fd < 0);
     92 }
     93 
     94 static u64 get_cycle(void)
     95 {
     96 	int ret;
     97 	u64 clk;
     98 
     99 	ret = read(cycle_fd, &clk, sizeof(u64));
    100 	BUG_ON(ret != sizeof(u64));
    101 
    102 	return clk;
    103 }
    104 
    105 static double timeval2double(struct timeval *ts)
    106 {
    107 	return (double)ts->tv_sec +
    108 		(double)ts->tv_usec / (double)1000000;
    109 }
    110 
    111 static void alloc_mem(void **dst, void **src, size_t length)
    112 {
    113 	*dst = zalloc(length);
    114 	if (!*dst)
    115 		die("memory allocation failed - maybe length is too large?\n");
    116 
    117 	*src = zalloc(length);
    118 	if (!*src)
    119 		die("memory allocation failed - maybe length is too large?\n");
    120 	/* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
    121 	memset(*src, 0, length);
    122 }
    123 
    124 static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
    125 {
    126 	u64 cycle_start = 0ULL, cycle_end = 0ULL;
    127 	void *src = NULL, *dst = NULL;
    128 	int i;
    129 
    130 	alloc_mem(&src, &dst, len);
    131 
    132 	if (prefault)
    133 		fn(dst, src, len);
    134 
    135 	cycle_start = get_cycle();
    136 	for (i = 0; i < iterations; ++i)
    137 		fn(dst, src, len);
    138 	cycle_end = get_cycle();
    139 
    140 	free(src);
    141 	free(dst);
    142 	return cycle_end - cycle_start;
    143 }
    144 
    145 static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
    146 {
    147 	struct timeval tv_start, tv_end, tv_diff;
    148 	void *src = NULL, *dst = NULL;
    149 	int i;
    150 
    151 	alloc_mem(&src, &dst, len);
    152 
    153 	if (prefault)
    154 		fn(dst, src, len);
    155 
    156 	BUG_ON(gettimeofday(&tv_start, NULL));
    157 	for (i = 0; i < iterations; ++i)
    158 		fn(dst, src, len);
    159 	BUG_ON(gettimeofday(&tv_end, NULL));
    160 
    161 	timersub(&tv_end, &tv_start, &tv_diff);
    162 
    163 	free(src);
    164 	free(dst);
    165 	return (double)((double)len / timeval2double(&tv_diff));
    166 }
    167 
    168 #define pf (no_prefault ? 0 : 1)
    169 
    170 #define print_bps(x) do {					\
    171 		if (x < K)					\
    172 			printf(" %14lf B/Sec", x);		\
    173 		else if (x < K * K)				\
    174 			printf(" %14lfd KB/Sec", x / K);	\
    175 		else if (x < K * K * K)				\
    176 			printf(" %14lf MB/Sec", x / K / K);	\
    177 		else						\
    178 			printf(" %14lf GB/Sec", x / K / K / K); \
    179 	} while (0)
    180 
    181 int bench_mem_memcpy(int argc, const char **argv,
    182 		     const char *prefix __maybe_unused)
    183 {
    184 	int i;
    185 	size_t len;
    186 	double result_bps[2];
    187 	u64 result_cycle[2];
    188 
    189 	argc = parse_options(argc, argv, options,
    190 			     bench_mem_memcpy_usage, 0);
    191 
    192 	if (use_cycle)
    193 		init_cycle();
    194 
    195 	len = (size_t)perf_atoll((char *)length_str);
    196 
    197 	result_cycle[0] = result_cycle[1] = 0ULL;
    198 	result_bps[0] = result_bps[1] = 0.0;
    199 
    200 	if ((s64)len <= 0) {
    201 		fprintf(stderr, "Invalid length:%s\n", length_str);
    202 		return 1;
    203 	}
    204 
    205 	/* same to without specifying either of prefault and no-prefault */
    206 	if (only_prefault && no_prefault)
    207 		only_prefault = no_prefault = false;
    208 
    209 	for (i = 0; routines[i].name; i++) {
    210 		if (!strcmp(routines[i].name, routine))
    211 			break;
    212 	}
    213 	if (!routines[i].name) {
    214 		printf("Unknown routine:%s\n", routine);
    215 		printf("Available routines...\n");
    216 		for (i = 0; routines[i].name; i++) {
    217 			printf("\t%s ... %s\n",
    218 			       routines[i].name, routines[i].desc);
    219 		}
    220 		return 1;
    221 	}
    222 
    223 	if (bench_format == BENCH_FORMAT_DEFAULT)
    224 		printf("# Copying %s Bytes ...\n\n", length_str);
    225 
    226 	if (!only_prefault && !no_prefault) {
    227 		/* show both of results */
    228 		if (use_cycle) {
    229 			result_cycle[0] =
    230 				do_memcpy_cycle(routines[i].fn, len, false);
    231 			result_cycle[1] =
    232 				do_memcpy_cycle(routines[i].fn, len, true);
    233 		} else {
    234 			result_bps[0] =
    235 				do_memcpy_gettimeofday(routines[i].fn,
    236 						len, false);
    237 			result_bps[1] =
    238 				do_memcpy_gettimeofday(routines[i].fn,
    239 						len, true);
    240 		}
    241 	} else {
    242 		if (use_cycle) {
    243 			result_cycle[pf] =
    244 				do_memcpy_cycle(routines[i].fn,
    245 						len, only_prefault);
    246 		} else {
    247 			result_bps[pf] =
    248 				do_memcpy_gettimeofday(routines[i].fn,
    249 						len, only_prefault);
    250 		}
    251 	}
    252 
    253 	switch (bench_format) {
    254 	case BENCH_FORMAT_DEFAULT:
    255 		if (!only_prefault && !no_prefault) {
    256 			if (use_cycle) {
    257 				printf(" %14lf Cycle/Byte\n",
    258 					(double)result_cycle[0]
    259 					/ (double)len);
    260 				printf(" %14lf Cycle/Byte (with prefault)\n",
    261 					(double)result_cycle[1]
    262 					/ (double)len);
    263 			} else {
    264 				print_bps(result_bps[0]);
    265 				printf("\n");
    266 				print_bps(result_bps[1]);
    267 				printf(" (with prefault)\n");
    268 			}
    269 		} else {
    270 			if (use_cycle) {
    271 				printf(" %14lf Cycle/Byte",
    272 					(double)result_cycle[pf]
    273 					/ (double)len);
    274 			} else
    275 				print_bps(result_bps[pf]);
    276 
    277 			printf("%s\n", only_prefault ? " (with prefault)" : "");
    278 		}
    279 		break;
    280 	case BENCH_FORMAT_SIMPLE:
    281 		if (!only_prefault && !no_prefault) {
    282 			if (use_cycle) {
    283 				printf("%lf %lf\n",
    284 					(double)result_cycle[0] / (double)len,
    285 					(double)result_cycle[1] / (double)len);
    286 			} else {
    287 				printf("%lf %lf\n",
    288 					result_bps[0], result_bps[1]);
    289 			}
    290 		} else {
    291 			if (use_cycle) {
    292 				printf("%lf\n", (double)result_cycle[pf]
    293 					/ (double)len);
    294 			} else
    295 				printf("%lf\n", result_bps[pf]);
    296 		}
    297 		break;
    298 	default:
    299 		/* reaching this means there's some disaster: */
    300 		die("unknown format: %d\n", bench_format);
    301 		break;
    302 	}
    303 
    304 	return 0;
    305 }
    306