1 /** 2 * @file op_syscalls.c 3 * Tracing of system calls 4 * 5 * @remark Copyright 2002 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author Bob Montgomery 9 * @author Will Cohen 10 * @author John Levon 11 * @author Philippe Elie 12 */ 13 14 #include <linux/sched.h> 15 #include <linux/unistd.h> 16 #include <linux/mman.h> 17 #include <linux/file.h> 18 19 #include "oprofile.h" 20 #include "op_dcache.h" 21 #include "op_util.h" 22 23 uint dname_top; 24 struct qstr **dname_stack; 25 char * pool_pos; 26 char * pool_start; 27 char * pool_end; 28 29 void oprof_put_note(struct op_note * samp); 30 31 /* ------------ system calls --------------- */ 32 33 struct mmap_arg_struct { 34 unsigned long addr; 35 unsigned long len; 36 unsigned long prot; 37 unsigned long flags; 38 unsigned long fd; 39 unsigned long offset; 40 }; 41 42 /* --------- IA64 versions of system calls ------ */ 43 asmlinkage static int (*old_sys_clone)(long, long); 44 asmlinkage static int (*old_sys_clone2)(long, long, long); 45 asmlinkage static int (*old_sys_execve)(char *, char **, char **); 46 asmlinkage static unsigned long (*old_sys_mmap)(unsigned long, 47 unsigned long, int, int, int, long); 48 asmlinkage static unsigned long (*old_sys_mmap2)(unsigned long, 49 unsigned long, int, int, int, long); 50 asmlinkage static long (*old_sys_init_module)(char const *, struct module *); 51 asmlinkage static long (*old_sys_exit)(int); 52 53 /* --------- declarations of interception stubs for IA64 ------ */ 54 asmlinkage long post_stub_clone(long, long); 55 asmlinkage long post_stub_clone2(long, long, long); 56 asmlinkage long my_ia64_execve(char *, char **, char **); 57 asmlinkage unsigned long post_stub_mmap(unsigned long, 58 unsigned long, int, int, int, long); 59 asmlinkage unsigned long post_stub_mmap2(unsigned long, 60 unsigned long, int, int, int, long); 61 asmlinkage long post_stub_init_module(char const *, struct module *); 62 asmlinkage long pre_stub_exit(int); 63 64 /* IA64 system call table doesn't use function pointers, it uses 65 * pointers to code (not the same thing). Basically it can violate the 66 * procedure calling rules because these "procedure calls" are made by 67 * the assembly language BREAK handler in ivt.S. 68 */ 69 70 struct fdesc { 71 void * ip; 72 void * gp; 73 }; 74 75 struct fdesc fdesc_clone; 76 struct fdesc fdesc_clone2; 77 struct fdesc fdesc_execve; 78 struct fdesc fdesc_mmap; 79 struct fdesc fdesc_mmap2; 80 struct fdesc fdesc_init_module; 81 struct fdesc fdesc_exit; 82 /* ----------- End of IA64 weirdness for now -------------- */ 83 84 spinlock_t map_lock = SPIN_LOCK_UNLOCKED; 85 86 /* called with map_lock held */ 87 static void oprof_output_map(ulong addr, ulong len, 88 ulong offset, struct file * file, int is_execve) 89 { 90 struct op_note note; 91 92 /* don't bother with /dev/zero mappings etc. */ 93 if (!len) 94 return; 95 96 note.pid = current->pid; 97 note.tgid = op_get_tgid(); 98 note.addr = addr; 99 note.len = len; 100 note.offset = offset; 101 note.type = is_execve ? OP_EXEC : OP_MAP; 102 note.hash = hash_path(file); 103 if (note.hash == -1) 104 return; 105 oprof_put_note(¬e); 106 } 107 108 static int oprof_output_maps(struct task_struct * task) 109 { 110 int size=0; 111 struct mm_struct * mm; 112 struct vm_area_struct * map; 113 114 /* we don't need to worry about mm_users here, since there is at 115 least one user (current), and if there's other code using this 116 mm, then mm_users must be at least 2; we should never have to 117 mmput() here. */ 118 119 if (!(mm = task->mm)) 120 goto out; 121 122 lock_mmap(mm); 123 spin_lock(&map_lock); 124 125 /* We need two pass, daemon assume than the first mmap notification 126 * is for the executable but some process doesn't follow this model. 127 */ 128 for (map = mm->mmap; map; map = map->vm_next) { 129 if (!(map->vm_flags & VM_EXEC) || !map->vm_file) 130 continue; 131 if (!(map->vm_flags & VM_EXECUTABLE)) 132 continue; 133 134 oprof_output_map(map->vm_start, map->vm_end-map->vm_start, 135 GET_VM_OFFSET(map), map->vm_file, 1); 136 } 137 for (map = mm->mmap; map; map = map->vm_next) { 138 if (!(map->vm_flags & VM_EXEC) || !map->vm_file) 139 continue; 140 if (map->vm_flags & VM_EXECUTABLE) 141 continue; 142 143 oprof_output_map(map->vm_start, map->vm_end-map->vm_start, 144 GET_VM_OFFSET(map), map->vm_file, 0); 145 } 146 spin_unlock(&map_lock); 147 unlock_mmap(mm); 148 149 out: 150 return size; 151 } 152 153 154 /* execve is a special case on IA64. The others get the result and 155 * arguments after the system call has been made from the ASM stub. */ 156 157 asmlinkage long 158 my_sys_execve (char * filename, char **argv, char **envp, struct pt_regs * regs) 159 { 160 int error; 161 162 MOD_INC_USE_COUNT; 163 164 filename = getname(filename); 165 error = PTR_ERR(filename); 166 if (IS_ERR(filename)) 167 goto out; 168 error = do_execve(filename, argv, envp, regs); 169 170 if (!error) { 171 PTRACE_OFF(current); 172 oprof_output_maps(current); 173 } 174 putname(filename); 175 out: 176 unlock_execve(); 177 MOD_DEC_USE_COUNT; 178 return error; 179 } 180 181 182 static void out_mmap(ulong addr, ulong len, ulong prot, ulong flags, 183 ulong fd, ulong offset) 184 { 185 struct file * file; 186 187 lock_out_mmap(); 188 189 file = fget(fd); 190 if (!file) 191 goto out; 192 193 spin_lock(&map_lock); 194 oprof_output_map(addr, len, offset, file, 0); 195 spin_unlock(&map_lock); 196 197 fput(file); 198 199 out: 200 unlock_out_mmap(); 201 } 202 203 204 /* 205 * IA64 mmap routines: 206 * The post_sys_* routines are called after the syscall has been made. 207 * The first argument is the return value from the system call. 208 */ 209 asmlinkage void post_sys_mmap2(ulong ret, ulong addr, ulong len, 210 ulong prot, ulong flags, ulong fd, ulong pgoff) 211 { 212 /* FIXME: This should be done in the ASM stub. */ 213 MOD_INC_USE_COUNT; 214 215 if ((prot & PROT_EXEC) && ret >= 0) 216 out_mmap(ret, len, prot, flags, fd, pgoff << PAGE_SHIFT); 217 goto out; 218 out: 219 MOD_DEC_USE_COUNT; 220 } 221 222 asmlinkage void post_sys_mmap(ulong ret, ulong addr, ulong len, 223 ulong prot, ulong flags, ulong fd, ulong off) 224 { 225 /* FIXME: This should be done in the ASM stub. */ 226 MOD_INC_USE_COUNT; 227 228 if ((prot & PROT_EXEC) && ret >= 0) 229 out_mmap(ret, len, prot, flags, fd, off); 230 goto out; 231 out: 232 MOD_DEC_USE_COUNT; 233 } 234 235 236 inline static void oprof_report_fork(u32 old_pid, u32 new_pid, u32 old_tgid, u32 new_tgid) 237 { 238 struct op_note note; 239 240 note.type = OP_FORK; 241 note.pid = old_pid; 242 note.tgid = old_tgid; 243 note.addr = new_pid; 244 note.len = new_tgid; 245 oprof_put_note(¬e); 246 } 247 248 249 asmlinkage void post_sys_clone(long ret, long arg0, long arg1) 250 { 251 u32 pid = current->pid; 252 u32 tgid = op_get_tgid(); 253 254 /* FIXME: This should be done in the ASM stub. */ 255 MOD_INC_USE_COUNT; 256 257 if (ret) 258 /* FIXME: my libc show clone() is not implemented in ia64 259 * but used only by fork() with a SIGCHILD first parameter 260 * so we assume it's a fork */ 261 oprof_report_fork(pid, ret, pid, tgid); 262 MOD_DEC_USE_COUNT; 263 } 264 265 asmlinkage void post_sys_clone2(long ret, long arg0, long arg1, long arg2) 266 { 267 u32 pid = current->pid; 268 u32 tgid = op_get_tgid(); 269 long clone_flags = arg0; 270 271 /* FIXME: This should be done in the ASM stub. */ 272 MOD_INC_USE_COUNT; 273 274 if (ret) { 275 if (clone_flags & CLONE_THREAD) 276 oprof_report_fork(pid, ret, tgid, tgid); 277 else 278 oprof_report_fork(pid, ret, tgid, ret); 279 } 280 MOD_DEC_USE_COUNT; 281 } 282 283 asmlinkage void 284 post_sys_init_module(long ret, char const * name_user, 285 struct module * mod_user) 286 { 287 /* FIXME: This should be done in the ASM stub. */ 288 MOD_INC_USE_COUNT; 289 290 if (ret >= 0) { 291 struct op_note note; 292 293 note.type = OP_DROP_MODULES; 294 oprof_put_note(¬e); 295 } 296 MOD_DEC_USE_COUNT; 297 } 298 299 /* Exit must use a pre-call intercept stub. There is no post exit. */ 300 asmlinkage void pre_sys_exit(int error_code) 301 { 302 struct op_note note; 303 304 MOD_INC_USE_COUNT; 305 306 note.addr = current->times.tms_utime; 307 note.len = current->times.tms_stime; 308 note.offset = current->start_time; 309 note.type = OP_EXIT; 310 note.pid = current->pid; 311 note.tgid = op_get_tgid(); 312 oprof_put_note(¬e); 313 314 /* this looks UP-dangerous, as the exit sleeps and we don't 315 * have a use count, but in fact its ok as sys_exit is noreturn, 316 * so we can never come back to this non-existent exec page 317 */ 318 MOD_DEC_USE_COUNT; 319 } 320 321 extern void * sys_call_table[]; 322 323 /* FIXME: Now that I'm never trying to do a C-level call through these 324 * pointers, I should just save, intercept, and restore with void * 325 * instead of the void * part of the function descriptor, I think. 326 */ 327 328 void op_save_syscalls(void) 329 { 330 fdesc_clone.ip = sys_call_table[__NR_clone - __NR_ni_syscall]; 331 old_sys_clone = (void *)&fdesc_clone; 332 fdesc_clone2.ip = sys_call_table[__NR_clone2 - __NR_ni_syscall]; 333 old_sys_clone2 = (void *)&fdesc_clone2; 334 fdesc_execve.ip = sys_call_table[__NR_execve - __NR_ni_syscall]; 335 old_sys_execve = (void *)&fdesc_execve; 336 fdesc_mmap.ip = sys_call_table[__NR_mmap - __NR_ni_syscall]; 337 old_sys_mmap = (void *)&fdesc_mmap; 338 fdesc_mmap2.ip = sys_call_table[__NR_mmap2 - __NR_ni_syscall]; 339 old_sys_mmap2 = (void *)&fdesc_mmap2; 340 fdesc_init_module.ip = sys_call_table[__NR_init_module - __NR_ni_syscall]; 341 old_sys_init_module = (void *)&fdesc_init_module; 342 fdesc_exit.ip = sys_call_table[__NR_exit - __NR_ni_syscall]; 343 old_sys_exit = (void *)&fdesc_exit; 344 } 345 346 void op_intercept_syscalls(void) 347 { 348 /* Must extract the function address from the stub function 349 * descriptors. 350 */ 351 sys_call_table[__NR_clone - __NR_ni_syscall] = 352 ((struct fdesc *)post_stub_clone)->ip; 353 sys_call_table[__NR_clone2 - __NR_ni_syscall] = 354 ((struct fdesc *)post_stub_clone2)->ip; 355 sys_call_table[__NR_execve - __NR_ni_syscall] = 356 ((struct fdesc *)my_ia64_execve)->ip; 357 sys_call_table[__NR_mmap - __NR_ni_syscall] = 358 ((struct fdesc *)post_stub_mmap)->ip; 359 sys_call_table[__NR_mmap2 - __NR_ni_syscall] = 360 ((struct fdesc *)post_stub_mmap2)->ip; 361 sys_call_table[__NR_init_module - __NR_ni_syscall] = 362 ((struct fdesc *)post_stub_init_module)->ip; 363 sys_call_table[__NR_exit - __NR_ni_syscall] = 364 ((struct fdesc *)pre_stub_exit)->ip; 365 } 366 367 void op_restore_syscalls(void) 368 { 369 sys_call_table[__NR_clone - __NR_ni_syscall] = 370 ((struct fdesc *)old_sys_clone)->ip; 371 sys_call_table[__NR_clone2 - __NR_ni_syscall] = 372 ((struct fdesc *)old_sys_clone2)->ip; 373 sys_call_table[__NR_execve - __NR_ni_syscall] = 374 ((struct fdesc *)old_sys_execve)->ip; 375 sys_call_table[__NR_mmap - __NR_ni_syscall] = 376 ((struct fdesc *)old_sys_mmap)->ip; 377 sys_call_table[__NR_mmap2 - __NR_ni_syscall] = 378 ((struct fdesc *)old_sys_mmap2)->ip; 379 sys_call_table[__NR_init_module - __NR_ni_syscall] = 380 ((struct fdesc *)old_sys_init_module)->ip; 381 sys_call_table[__NR_exit - __NR_ni_syscall] = 382 ((struct fdesc *)old_sys_exit)->ip; 383 } 384