1 /******************************************************************************/ 2 /* */ 3 /* Copyright (s) Ying Han <yinghan (at) google.com>, 2009 */ 4 /* */ 5 /* This program is free software; you can redistribute it and/or modify */ 6 /* it under the terms of the GNU General Public License as published by */ 7 /* the Free Software Foundation; either version 2 of the License, or */ 8 /* (at your option) any later version. */ 9 /* */ 10 /* This program is distributed in the hope that it will be useful, */ 11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ 12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See */ 13 /* the GNU General Public License for more details. */ 14 /* */ 15 /* You should have received a copy of the GNU General Public License */ 16 /* along with this program; if not, write to the Free Software */ 17 /* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ 18 /* */ 19 /******************************************************************************/ 20 /* 21 ftruncate-mmap: pages are lost after writing to mmaped file, 22 23 We triggered the failure during some internal experiment with 24 ftruncate/mmap/write/read sequence. And we found that some pages are 25 "lost" after writing to the mmaped file. which in the following test 26 cases (count >= 0). 27 28 First we deployed the test cases into group of machines and see about 29 >20% failure rate on average. Then, I did couple of experiment to try 30 to reproduce it on a single machine. what i found is that: 31 1. add a fsync after write the file, i can not reproduce this issue. 32 2. add memory pressure(mmap/mlock) while run the test in infinite 33 loop, the failure is reproduced quickly. ( background flushing ? ) 34 35 The "bad pages" count differs each time from one digit to 4,5 digit 36 for 128M ftruncated file. and what i also found that the bad page 37 number are contiguous for each segment which total bad pages container 38 several segments. ext "1-4, 9-20, 48-50" ( batch flushing ? ) 39 40 (The failure is reproduced based on 2.6.29-rc8, also happened on 41 2.6.18 kernel. . Here is the simple test case to reproduce it with 42 memory pressure. ) 43 */ 44 45 #include <sys/mman.h> 46 #include <sys/types.h> 47 #include <fcntl.h> 48 #include <unistd.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include <signal.h> 53 54 #include "test.h" 55 56 /* Extern Global Variables */ 57 extern int tst_count; 58 59 /* Global Variables */ 60 char *TCID = "mmap-corruption01"; /* test program identifier. */ 61 int TST_TOTAL = 1; /* total number of tests in this file. */ 62 63 long kMemSize = 128 << 20; 64 int kPageSize = 4096; 65 66 char *usage = "-h hours -m minutes -s secs\n"; 67 68 int anyfail(void) 69 { 70 tst_brkm(TFAIL, tst_rmdir, "Test failed\n"); 71 } 72 73 int main(int argc, char **argv) 74 { 75 char *progname; 76 int count = 0; 77 int i, c; 78 char *fname = "test.mmap-corruption"; 79 char *mem; 80 unsigned long alarmtime = 0; 81 struct sigaction sa; 82 void finish(int sig); 83 84 progname = *argv; 85 while ((c = getopt(argc, argv, ":h:m:s:")) != -1) { 86 switch (c) { 87 case 'h': 88 alarmtime += atoi(optarg) * 60 * 60; 89 break; 90 case 'm': 91 alarmtime += atoi(optarg) * 60; 92 break; 93 case 's': 94 alarmtime += atoi(optarg); 95 break; 96 default: 97 (void)fprintf(stderr, "usage: %s %s\n", progname, 98 usage); 99 anyfail(); 100 } 101 } 102 103 /* 104 * Plan for death by signal. User may have specified 105 * a time limit, in which case set an alarm and catch SIGALRM. 106 * Also catch and cleanup with SIGINT, SIGQUIT, and SIGTERM. 107 */ 108 sa.sa_handler = finish; 109 sa.sa_flags = 0; 110 if (sigemptyset(&sa.sa_mask)) { 111 perror("sigempty error"); 112 exit(1); 113 } 114 115 if (sigaction(SIGINT, &sa, 0) == -1) { 116 perror("sigaction error SIGINT"); 117 exit(1); 118 } 119 if (alarmtime) { 120 if (sigaction(SIGALRM, &sa, 0) == -1) { 121 perror("sigaction error"); 122 exit(1); 123 } 124 (void)alarm(alarmtime); 125 printf("mmap-corruption will run for=> %ld, seconds\n", 126 alarmtime); 127 } else { //Run for 5 secs only 128 if (sigaction(SIGALRM, &sa, 0) == -1) { 129 perror("sigaction error"); 130 exit(1); 131 } 132 (void)alarm(5); 133 printf("mmap-corruption will run for=> 5, seconds\n"); 134 } 135 /* If we get a SIGQUIT or SIGTERM, clean up and exit immediately. */ 136 sa.sa_handler = finish; 137 if (sigaction(SIGQUIT, &sa, 0) == -1) { 138 perror("sigaction error SIGQUIT"); 139 exit(1); 140 } 141 if (sigaction(SIGTERM, &sa, 0) == -1) { 142 perror("sigaction error SIGTERM"); 143 exit(1); 144 } 145 146 tst_tmpdir(); 147 while (1) { 148 unlink(fname); 149 int fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600); 150 ftruncate(fd, kMemSize); 151 152 mem = 153 mmap(0, kMemSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 154 0); 155 // Fill the memory with 1s. 156 memset(mem, 1, kMemSize); 157 158 for (i = 0; i < kMemSize; i++) { 159 int byte_good = mem[i] != 0; 160 if (!byte_good && ((i % kPageSize) == 0)) { 161 //printf("%d ", i / kPageSize); 162 count++; 163 } 164 } 165 munmap(mem, kMemSize); 166 close(fd); 167 unlink(fname); 168 if (count > 0) { 169 printf("Running %d bad page\n", count); 170 return 1; 171 } 172 count = 0; 173 } 174 return 0; 175 } 176 177 void finish(int sig) 178 { 179 printf("mmap-corruption PASSED\n"); 180 exit(0); 181 } 182