1 // This artificial program runs a lot of code. The exact amount depends on 2 // the command line -- if an arg "0" is given, it does exactly 3 // the same amount of work, but using four times as much code. 4 // If an arg >= 1 is given, the amount of code is multiplied by this arg. 5 // 6 // It's a stress test for Valgrind's translation speed; natively the two 7 // modes run in about the same time (the I-cache effects aren't big enough 8 // to make a difference), but under Valgrind the one running more code is 9 // significantly slower due to the extra translation time. 10 11 // 31 Aug 2015: this only "works" on x86/amd64/s390 by accident; the 12 // test is essentially kludged. This "generates" code into memory 13 // (the mmap'd area) and the executes it. But historically and even 14 // after this commit (r15601), the test has been run without 15 // --smc-check=all or all-non-file. That just happens to work because 16 // the "generated" code is never modified, so there's never a 17 // translated-vs-reality coherence problem. Really we ought to run 18 // with the new-as-of-r15601 default --smc-check=all-non-file, but that 19 // hugely slows it down and makes the results non-comparable with 20 // pre r15601 results, so instead the .vgperf files now specify the 21 // old default value --smc-check=stack explicitly. 22 23 24 #include <stdio.h> 25 #include <string.h> 26 #include <stdlib.h> 27 #include <assert.h> 28 #if defined(__mips__) 29 #include <asm/cachectl.h> 30 #include <sys/syscall.h> 31 #endif 32 #include "tests/sys_mman.h" 33 34 #define FN_SIZE 1280 // Must be big enough to hold the compiled f() 35 // and any literal pool that might be used 36 #define N_LOOPS 20000 // Should be divisible by four 37 #define RATIO 4 // Ratio of code sizes between the two modes 38 39 int f(int x, int y) 40 { 41 int i; 42 for (i = 0; i < 5000; i++) { 43 switch (x % 8) { 44 case 1: y += 3; 45 case 2: y += x; 46 case 3: y *= 2; 47 default: y--; 48 } 49 } 50 return y; 51 } 52 53 int main(int argc, char* argv[]) 54 { 55 int h, i, sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0; 56 int n_fns, n_reps; 57 58 if (argc <= 1) { 59 // Mode 1: not so much code 60 n_fns = N_LOOPS / RATIO; 61 n_reps = RATIO; 62 printf("mode 1: "); 63 } else { 64 // Mode 2: lots of code 65 const int mul = atoi(argv[1]); 66 if (mul == 0) 67 n_fns = N_LOOPS; 68 else 69 n_fns = N_LOOPS * mul; 70 n_reps = 1; 71 printf("mode 1: "); 72 } 73 printf("%d copies of f(), %d reps\n", n_fns, n_reps); 74 75 char* a = mmap(0, FN_SIZE * n_fns, 76 PROT_EXEC|PROT_WRITE|PROT_READ, 77 MAP_PRIVATE|MAP_ANONYMOUS, -1,0); 78 assert(a != (char*)MAP_FAILED); 79 80 // Make a whole lot of copies of f(). FN_SIZE is much bigger than f() 81 // will ever be (we hope). 82 for (i = 0; i < n_fns; i++) { 83 memcpy(&a[FN_SIZE*i], f, FN_SIZE); 84 } 85 86 #if defined(__mips__) 87 syscall(__NR_cacheflush, a, FN_SIZE * n_fns, ICACHE); 88 #endif 89 90 for (h = 0; h < n_reps; h += 1) { 91 for (i = 0; i < n_fns; i += 4) { 92 int(*f1)(int,int) = (void*)&a[FN_SIZE*(i+0)]; 93 int(*f2)(int,int) = (void*)&a[FN_SIZE*(i+1)]; 94 int(*f3)(int,int) = (void*)&a[FN_SIZE*(i+2)]; 95 int(*f4)(int,int) = (void*)&a[FN_SIZE*(i+3)]; 96 sum1 += f1(i+0, n_fns-i+0); 97 sum2 += f2(i+1, n_fns-i+1); 98 sum3 += f3(i+2, n_fns-i+2); 99 sum4 += f4(i+3, n_fns-i+3); 100 if (i % 1000 == 0) 101 printf("."); 102 } 103 } 104 printf("result = %d\n", sum1 + sum2 + sum3 + sum4); 105 return 0; 106 } 107