1 // This artificial program runs a lot of code. The exact amount depends on 2 // the command line -- if an arg "0" is given, it does exactly 3 // the same amount of work, but using four times as much code. 4 // If an arg >= 1 is given, the amount of code is multiplied by this arg. 5 // 6 // It's a stress test for Valgrind's translation speed; natively the two 7 // modes run in about the same time (the I-cache effects aren't big enough 8 // to make a difference), but under Valgrind the one running more code is 9 // significantly slower due to the extra translation time. 10 11 // 31 Aug 2015: this only "works" on x86/amd64/s390 by accident; the 12 // test is essentially kludged. This "generates" code into memory 13 // (the mmap'd area) and the executes it. But historically and even 14 // after this commit (r15601), the test has been run without 15 // --smc-check=all or all-non-file. That just happens to work because 16 // the "generated" code is never modified, so there's never a 17 // translated-vs-reality coherence problem. Really we ought to run 18 // with the new-as-of-r15601 default --smc-check=all-non-file, but that 19 // hugely slows it down and makes the results non-comparable with 20 // pre r15601 results, so instead the .vgperf files now specify the 21 // old default value --smc-check=stack explicitly. 22 23 24 #include <stdio.h> 25 #include <string.h> 26 #include <stdlib.h> 27 #include <assert.h> 28 #if defined(__mips__) 29 #include <asm/cachectl.h> 30 #include <sys/syscall.h> 31 #elif defined(__tilegx__) 32 #include <asm/cachectl.h> 33 #endif 34 #include "tests/sys_mman.h" 35 36 #define FN_SIZE 1280 // Must be big enough to hold the compiled f() 37 // and any literal pool that might be used 38 #define N_LOOPS 20000 // Should be divisible by four 39 #define RATIO 4 // Ratio of code sizes between the two modes 40 41 int f(int x, int y) 42 { 43 int i; 44 for (i = 0; i < 5000; i++) { 45 switch (x % 8) { 46 case 1: y += 3; 47 case 2: y += x; 48 case 3: y *= 2; 49 default: y--; 50 } 51 } 52 return y; 53 } 54 55 int main(int argc, char* argv[]) 56 { 57 int h, i, sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0; 58 int n_fns, n_reps; 59 60 if (argc <= 1) { 61 // Mode 1: not so much code 62 n_fns = N_LOOPS / RATIO; 63 n_reps = RATIO; 64 printf("mode 1: "); 65 } else { 66 // Mode 2: lots of code 67 const int mul = atoi(argv[1]); 68 if (mul == 0) 69 n_fns = N_LOOPS; 70 else 71 n_fns = N_LOOPS * mul; 72 n_reps = 1; 73 printf("mode 1: "); 74 } 75 printf("%d copies of f(), %d reps\n", n_fns, n_reps); 76 77 char* a = mmap(0, FN_SIZE * n_fns, 78 PROT_EXEC|PROT_WRITE, 79 MAP_PRIVATE|MAP_ANONYMOUS, -1,0); 80 assert(a != (char*)MAP_FAILED); 81 82 // Make a whole lot of copies of f(). FN_SIZE is much bigger than f() 83 // will ever be (we hope). 84 for (i = 0; i < n_fns; i++) { 85 memcpy(&a[FN_SIZE*i], f, FN_SIZE); 86 } 87 88 #if defined(__mips__) 89 syscall(__NR_cacheflush, a, FN_SIZE * n_fns, ICACHE); 90 #elif defined(__tilegx__) 91 cacheflush(a, FN_SIZE * n_fns, ICACHE); 92 #endif 93 94 for (h = 0; h < n_reps; h += 1) { 95 for (i = 0; i < n_fns; i += 4) { 96 int(*f1)(int,int) = (void*)&a[FN_SIZE*(i+0)]; 97 int(*f2)(int,int) = (void*)&a[FN_SIZE*(i+1)]; 98 int(*f3)(int,int) = (void*)&a[FN_SIZE*(i+2)]; 99 int(*f4)(int,int) = (void*)&a[FN_SIZE*(i+3)]; 100 sum1 += f1(i+0, n_fns-i+0); 101 sum2 += f2(i+1, n_fns-i+1); 102 sum3 += f3(i+2, n_fns-i+2); 103 sum4 += f4(i+3, n_fns-i+3); 104 if (i % 1000 == 0) 105 printf("."); 106 } 107 } 108 printf("result = %d\n", sum1 + sum2 + sum3 + sum4); 109 return 0; 110 } 111