Home | History | Annotate | Download | only in linux
      1 #ifndef _LINUX_SCHED_H
      2 #define _LINUX_SCHED_H
      3 
      4 #include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
      5 
      6 /*
      7  * cloning flags:
      8  */
      9 #define CSIGNAL		0x000000ff	/* signal mask to be sent at exit */
     10 #define CLONE_VM	0x00000100	/* set if VM shared between processes */
     11 #define CLONE_FS	0x00000200	/* set if fs info shared between processes */
     12 #define CLONE_FILES	0x00000400	/* set if open files shared between processes */
     13 #define CLONE_SIGHAND	0x00000800	/* set if signal handlers and blocked signals shared */
     14 #define CLONE_PTRACE	0x00002000	/* set if we want to let tracing continue on the child too */
     15 #define CLONE_VFORK	0x00004000	/* set if the parent wants the child to wake it up on mm_release */
     16 #define CLONE_PARENT	0x00008000	/* set if we want to have the same parent as the cloner */
     17 #define CLONE_THREAD	0x00010000	/* Same thread group? */
     18 #define CLONE_NEWNS	0x00020000	/* New namespace group? */
     19 #define CLONE_SYSVSEM	0x00040000	/* share system V SEM_UNDO semantics */
     20 #define CLONE_SETTLS	0x00080000	/* create a new TLS for the child */
     21 #define CLONE_PARENT_SETTID	0x00100000	/* set the TID in the parent */
     22 #define CLONE_CHILD_CLEARTID	0x00200000	/* clear the TID in the child */
     23 #define CLONE_DETACHED		0x00400000	/* Unused, ignored */
     24 #define CLONE_UNTRACED		0x00800000	/* set if the tracing process can't force CLONE_PTRACE on this clone */
     25 #define CLONE_CHILD_SETTID	0x01000000	/* set the TID in the child */
     26 #define CLONE_STOPPED		0x02000000	/* Start in stopped state */
     27 
     28 /*
     29  * Scheduling policies
     30  */
     31 #define SCHED_NORMAL		0
     32 #define SCHED_FIFO		1
     33 #define SCHED_RR		2
     34 #define SCHED_BATCH		3
     35 
     36 #ifdef __KERNEL__
     37 
     38 struct sched_param {
     39 	int sched_priority;
     40 };
     41 
     42 #include <asm/param.h>	/* for HZ */
     43 
     44 #include <linux/capability.h>
     45 #include <linux/threads.h>
     46 #include <linux/kernel.h>
     47 #include <linux/types.h>
     48 #include <linux/timex.h>
     49 #include <linux/jiffies.h>
     50 #include <linux/rbtree.h>
     51 #include <linux/thread_info.h>
     52 #include <linux/cpumask.h>
     53 #include <linux/errno.h>
     54 #include <linux/nodemask.h>
     55 
     56 #include <asm/system.h>
     57 #include <asm/semaphore.h>
     58 #include <asm/page.h>
     59 #include <asm/ptrace.h>
     60 #include <asm/mmu.h>
     61 #include <asm/cputime.h>
     62 
     63 #include <linux/smp.h>
     64 #include <linux/sem.h>
     65 #include <linux/signal.h>
     66 #include <linux/securebits.h>
     67 #include <linux/fs_struct.h>
     68 #include <linux/compiler.h>
     69 #include <linux/completion.h>
     70 #include <linux/pid.h>
     71 #include <linux/percpu.h>
     72 #include <linux/topology.h>
     73 #include <linux/seccomp.h>
     74 #include <linux/rcupdate.h>
     75 #include <linux/futex.h>
     76 #include <linux/rtmutex.h>
     77 
     78 #include <linux/time.h>
     79 #include <linux/param.h>
     80 #include <linux/resource.h>
     81 #include <linux/timer.h>
     82 #include <linux/hrtimer.h>
     83 
     84 #include <asm/processor.h>
     85 
     86 struct exec_domain;
     87 struct futex_pi_state;
     88 
     89 /*
     90  * List of flags we want to share for kernel threads,
     91  * if only because they are not used by them anyway.
     92  */
     93 #define CLONE_KERNEL	(CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
     94 
     95 /*
     96  * These are the constant used to fake the fixed-point load-average
     97  * counting. Some notes:
     98  *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
     99  *    a load-average precision of 10 bits integer + 11 bits fractional
    100  *  - if you want to count load-averages more often, you need more
    101  *    precision, or rounding will get you. With 2-second counting freq,
    102  *    the EXP_n values would be 1981, 2034 and 2043 if still using only
    103  *    11 bit fractions.
    104  */
    105 extern unsigned long avenrun[];		/* Load averages */
    106 
    107 #define FSHIFT		11		/* nr of bits of precision */
    108 #define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
    109 #define LOAD_FREQ	(5*HZ)		/* 5 sec intervals */
    110 #define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
    111 #define EXP_5		2014		/* 1/exp(5sec/5min) */
    112 #define EXP_15		2037		/* 1/exp(5sec/15min) */
    113 
    114 #define CALC_LOAD(load,exp,n) \
    115 	load *= exp; \
    116 	load += n*(FIXED_1-exp); \
    117 	load >>= FSHIFT;
    118 
    119 extern unsigned long total_forks;
    120 extern int nr_threads;
    121 extern int last_pid;
    122 DECLARE_PER_CPU(unsigned long, process_counts);
    123 extern int nr_processes(void);
    124 extern unsigned long nr_running(void);
    125 extern unsigned long nr_uninterruptible(void);
    126 extern unsigned long nr_active(void);
    127 extern unsigned long nr_iowait(void);
    128 extern unsigned long weighted_cpuload(const int cpu);
    129 
    130 
    131 /*
    132  * Task state bitmask. NOTE! These bits are also
    133  * encoded in fs/proc/array.c: get_task_state().
    134  *
    135  * We have two separate sets of flags: task->state
    136  * is about runnability, while task->exit_state are
    137  * about the task exiting. Confusing, but this way
    138  * modifying one set can't modify the other one by
    139  * mistake.
    140  */
    141 #define TASK_RUNNING		0
    142 #define TASK_INTERRUPTIBLE	1
    143 #define TASK_UNINTERRUPTIBLE	2
    144 #define TASK_STOPPED		4
    145 #define TASK_TRACED		8
    146 /* in tsk->exit_state */
    147 #define EXIT_ZOMBIE		16
    148 #define EXIT_DEAD		32
    149 /* in tsk->state again */
    150 #define TASK_NONINTERACTIVE	64
    151 
    152 #define __set_task_state(tsk, state_value)		\
    153 	do { (tsk)->state = (state_value); } while (0)
    154 #define set_task_state(tsk, state_value)		\
    155 	set_mb((tsk)->state, (state_value))
    156 
    157 /*
    158  * set_current_state() includes a barrier so that the write of current->state
    159  * is correctly serialised wrt the caller's subsequent test of whether to
    160  * actually sleep:
    161  *
    162  *	set_current_state(TASK_UNINTERRUPTIBLE);
    163  *	if (do_i_need_to_sleep())
    164  *		schedule();
    165  *
    166  * If the caller does not need such serialisation then use __set_current_state()
    167  */
    168 #define __set_current_state(state_value)			\
    169 	do { current->state = (state_value); } while (0)
    170 #define set_current_state(state_value)		\
    171 	set_mb(current->state, (state_value))
    172 
    173 /* Task command name length */
    174 #define TASK_COMM_LEN 16
    175 
    176 #include <linux/spinlock.h>
    177 
    178 /*
    179  * This serializes "schedule()" and also protects
    180  * the run-queue from deletions/modifications (but
    181  * _adding_ to the beginning of the run-queue has
    182  * a separate lock).
    183  */
    184 extern rwlock_t tasklist_lock;
    185 extern spinlock_t mmlist_lock;
    186 
    187 struct task_struct;
    188 
    189 extern void sched_init(void);
    190 extern void sched_init_smp(void);
    191 extern void init_idle(struct task_struct *idle, int cpu);
    192 
    193 extern cpumask_t nohz_cpu_mask;
    194 
    195 extern void show_state(void);
    196 extern void show_regs(struct pt_regs *);
    197 
    198 /*
    199  * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
    200  * task), SP is the stack pointer of the first frame that should be shown in the back
    201  * trace (or NULL if the entire call-chain of the task should be shown).
    202  */
    203 extern void show_stack(struct task_struct *task, unsigned long *sp);
    204 
    205 void io_schedule(void);
    206 long io_schedule_timeout(long timeout);
    207 
    208 extern void cpu_init (void);
    209 extern void trap_init(void);
    210 extern void update_process_times(int user);
    211 extern void scheduler_tick(void);
    212 
    213 #ifdef CONFIG_DETECT_SOFTLOCKUP
    214 extern void softlockup_tick(void);
    215 extern void spawn_softlockup_task(void);
    216 extern void touch_softlockup_watchdog(void);
    217 #else
    218 static inline void softlockup_tick(void)
    219 {
    220 }
    221 static inline void spawn_softlockup_task(void)
    222 {
    223 }
    224 static inline void touch_softlockup_watchdog(void)
    225 {
    226 }
    227 #endif
    228 
    229 
    230 /* Attach to any functions which should be ignored in wchan output. */
    231 #define __sched		__attribute__((__section__(".sched.text")))
    232 /* Is this address in the __sched functions? */
    233 extern int in_sched_functions(unsigned long addr);
    234 
    235 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
    236 extern signed long FASTCALL(schedule_timeout(signed long timeout));
    237 extern signed long schedule_timeout_interruptible(signed long timeout);
    238 extern signed long schedule_timeout_uninterruptible(signed long timeout);
    239 asmlinkage void schedule(void);
    240 
    241 struct namespace;
    242 
    243 /* Maximum number of active map areas.. This is a random (large) number */
    244 #define DEFAULT_MAX_MAP_COUNT	65536
    245 
    246 extern int sysctl_max_map_count;
    247 
    248 #include <linux/aio.h>
    249 
    250 extern unsigned long
    251 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
    252 		       unsigned long, unsigned long);
    253 extern unsigned long
    254 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
    255 			  unsigned long len, unsigned long pgoff,
    256 			  unsigned long flags);
    257 extern void arch_unmap_area(struct mm_struct *, unsigned long);
    258 extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
    259 
    260 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
    261 /*
    262  * The mm counters are not protected by its page_table_lock,
    263  * so must be incremented atomically.
    264  */
    265 #define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
    266 #define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
    267 #define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
    268 #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
    269 #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
    270 typedef atomic_long_t mm_counter_t;
    271 
    272 #else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
    273 /*
    274  * The mm counters are protected by its page_table_lock,
    275  * so can be incremented directly.
    276  */
    277 #define set_mm_counter(mm, member, value) (mm)->_##member = (value)
    278 #define get_mm_counter(mm, member) ((mm)->_##member)
    279 #define add_mm_counter(mm, member, value) (mm)->_##member += (value)
    280 #define inc_mm_counter(mm, member) (mm)->_##member++
    281 #define dec_mm_counter(mm, member) (mm)->_##member--
    282 typedef unsigned long mm_counter_t;
    283 
    284 #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
    285 
    286 #define get_mm_rss(mm)					\
    287 	(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
    288 #define update_hiwater_rss(mm)	do {			\
    289 	unsigned long _rss = get_mm_rss(mm);		\
    290 	if ((mm)->hiwater_rss < _rss)			\
    291 		(mm)->hiwater_rss = _rss;		\
    292 } while (0)
    293 #define update_hiwater_vm(mm)	do {			\
    294 	if ((mm)->hiwater_vm < (mm)->total_vm)		\
    295 		(mm)->hiwater_vm = (mm)->total_vm;	\
    296 } while (0)
    297 
    298 struct mm_struct {
    299 	struct vm_area_struct * mmap;		/* list of VMAs */
    300 	struct rb_root mm_rb;
    301 	struct vm_area_struct * mmap_cache;	/* last find_vma result */
    302 	unsigned long (*get_unmapped_area) (struct file *filp,
    303 				unsigned long addr, unsigned long len,
    304 				unsigned long pgoff, unsigned long flags);
    305 	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
    306 	unsigned long mmap_base;		/* base of mmap area */
    307 	unsigned long task_size;		/* size of task vm space */
    308 	unsigned long cached_hole_size;         /* if non-zero, the largest hole below free_area_cache */
    309 	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
    310 	pgd_t * pgd;
    311 	atomic_t mm_users;			/* How many users with user space? */
    312 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
    313 	int map_count;				/* number of VMAs */
    314 	struct rw_semaphore mmap_sem;
    315 	spinlock_t page_table_lock;		/* Protects page tables and some counters */
    316 
    317 	struct list_head mmlist;		/* List of maybe swapped mm's.  These are globally strung
    318 						 * together off init_mm.mmlist, and are protected
    319 						 * by mmlist_lock
    320 						 */
    321 
    322 	/* Special counters, in some configurations protected by the
    323 	 * page_table_lock, in other configurations by being atomic.
    324 	 */
    325 	mm_counter_t _file_rss;
    326 	mm_counter_t _anon_rss;
    327 
    328 	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
    329 	unsigned long hiwater_vm;	/* High-water virtual memory usage */
    330 
    331 	unsigned long total_vm, locked_vm, shared_vm, exec_vm;
    332 	unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
    333 	unsigned long start_code, end_code, start_data, end_data;
    334 	unsigned long start_brk, brk, start_stack;
    335 	unsigned long arg_start, arg_end, env_start, env_end;
    336 
    337 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
    338 
    339 	unsigned dumpable:2;
    340 	cpumask_t cpu_vm_mask;
    341 
    342 	/* Architecture-specific MM context */
    343 	mm_context_t context;
    344 
    345 	/* Token based thrashing protection. */
    346 	unsigned long swap_token_time;
    347 	char recent_pagein;
    348 
    349 	/* coredumping support */
    350 	int core_waiters;
    351 	struct completion *core_startup_done, core_done;
    352 
    353 	/* aio bits */
    354 	rwlock_t		ioctx_list_lock;
    355 	struct kioctx		*ioctx_list;
    356 };
    357 
    358 struct sighand_struct {
    359 	atomic_t		count;
    360 	struct k_sigaction	action[_NSIG];
    361 	spinlock_t		siglock;
    362 };
    363 
    364 struct pacct_struct {
    365 	int			ac_flag;
    366 	long			ac_exitcode;
    367 	unsigned long		ac_mem;
    368 	cputime_t		ac_utime, ac_stime;
    369 	unsigned long		ac_minflt, ac_majflt;
    370 };
    371 
    372 /*
    373  * NOTE! "signal_struct" does not have it's own
    374  * locking, because a shared signal_struct always
    375  * implies a shared sighand_struct, so locking
    376  * sighand_struct is always a proper superset of
    377  * the locking of signal_struct.
    378  */
    379 struct signal_struct {
    380 	atomic_t		count;
    381 	atomic_t		live;
    382 
    383 	wait_queue_head_t	wait_chldexit;	/* for wait4() */
    384 
    385 	/* current thread group signal load-balancing target: */
    386 	struct task_struct	*curr_target;
    387 
    388 	/* shared signal handling: */
    389 	struct sigpending	shared_pending;
    390 
    391 	/* thread group exit support */
    392 	int			group_exit_code;
    393 	/* overloaded:
    394 	 * - notify group_exit_task when ->count is equal to notify_count
    395 	 * - everyone except group_exit_task is stopped during signal delivery
    396 	 *   of fatal signals, group_exit_task processes the signal.
    397 	 */
    398 	struct task_struct	*group_exit_task;
    399 	int			notify_count;
    400 
    401 	/* thread group stop support, overloads group_exit_code too */
    402 	int			group_stop_count;
    403 	unsigned int		flags; /* see SIGNAL_* flags below */
    404 
    405 	/* POSIX.1b Interval Timers */
    406 	struct list_head posix_timers;
    407 
    408 	/* ITIMER_REAL timer for the process */
    409 	struct hrtimer real_timer;
    410 	struct task_struct *tsk;
    411 	ktime_t it_real_incr;
    412 
    413 	/* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */
    414 	cputime_t it_prof_expires, it_virt_expires;
    415 	cputime_t it_prof_incr, it_virt_incr;
    416 
    417 	/* job control IDs */
    418 	pid_t pgrp;
    419 	pid_t tty_old_pgrp;
    420 	pid_t session;
    421 	/* boolean value for session group leader */
    422 	int leader;
    423 
    424 	struct tty_struct *tty; /* NULL if no tty */
    425 
    426 	/*
    427 	 * Cumulative resource counters for dead threads in the group,
    428 	 * and for reaped dead child processes forked by this group.
    429 	 * Live threads maintain their own counters and add to these
    430 	 * in __exit_signal, except for the group leader.
    431 	 */
    432 	cputime_t utime, stime, cutime, cstime;
    433 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
    434 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
    435 
    436 	/*
    437 	 * Cumulative ns of scheduled CPU time for dead threads in the
    438 	 * group, not including a zombie group leader.  (This only differs
    439 	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
    440 	 * other than jiffies.)
    441 	 */
    442 	unsigned long long sched_time;
    443 
    444 	/*
    445 	 * We don't bother to synchronize most readers of this at all,
    446 	 * because there is no reader checking a limit that actually needs
    447 	 * to get both rlim_cur and rlim_max atomically, and either one
    448 	 * alone is a single word that can safely be read normally.
    449 	 * getrlimit/setrlimit use task_lock(current->group_leader) to
    450 	 * protect this instead of the siglock, because they really
    451 	 * have no need to disable irqs.
    452 	 */
    453 	struct rlimit rlim[RLIM_NLIMITS];
    454 
    455 	struct list_head cpu_timers[3];
    456 
    457 	/* keep the process-shared keyrings here so that they do the right
    458 	 * thing in threads created with CLONE_THREAD */
    459 #ifdef CONFIG_KEYS
    460 	struct key *session_keyring;	/* keyring inherited over fork */
    461 	struct key *process_keyring;	/* keyring private to this process */
    462 #endif
    463 #ifdef CONFIG_BSD_PROCESS_ACCT
    464 	struct pacct_struct pacct;	/* per-process accounting information */
    465 #endif
    466 #ifdef CONFIG_TASKSTATS
    467 	spinlock_t stats_lock;
    468 	struct taskstats *stats;
    469 #endif
    470 };
    471 
    472 /* Context switch must be unlocked if interrupts are to be enabled */
    473 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
    474 # define __ARCH_WANT_UNLOCKED_CTXSW
    475 #endif
    476 
    477 /*
    478  * Bits in flags field of signal_struct.
    479  */
    480 #define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
    481 #define SIGNAL_STOP_DEQUEUED	0x00000002 /* stop signal dequeued */
    482 #define SIGNAL_STOP_CONTINUED	0x00000004 /* SIGCONT since WCONTINUED reap */
    483 #define SIGNAL_GROUP_EXIT	0x00000008 /* group exit in progress */
    484 
    485 
    486 /*
    487  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
    488  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
    489  * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
    490  * values are inverted: lower p->prio value means higher priority.
    491  *
    492  * The MAX_USER_RT_PRIO value allows the actual maximum
    493  * RT priority to be separate from the value exported to
    494  * user-space.  This allows kernel threads to set their
    495  * priority to a value higher than any user task. Note:
    496  * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
    497  */
    498 
    499 #define MAX_USER_RT_PRIO	100
    500 #define MAX_RT_PRIO		MAX_USER_RT_PRIO
    501 
    502 #define MAX_PRIO		(MAX_RT_PRIO + 40)
    503 
    504 #define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
    505 #define rt_task(p)		rt_prio((p)->prio)
    506 #define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
    507 #define has_rt_policy(p) \
    508 	unlikely((p)->policy != SCHED_NORMAL && (p)->policy != SCHED_BATCH)
    509 
    510 /*
    511  * Some day this will be a full-fledged user tracking system..
    512  */
    513 struct user_struct {
    514 	atomic_t __count;	/* reference count */
    515 	atomic_t processes;	/* How many processes does this user have? */
    516 	atomic_t files;		/* How many open files does this user have? */
    517 	atomic_t sigpending;	/* How many pending signals does this user have? */
    518 #ifdef CONFIG_INOTIFY_USER
    519 	atomic_t inotify_watches; /* How many inotify watches does this user have? */
    520 	atomic_t inotify_devs;	/* How many inotify devs does this user have opened? */
    521 #endif
    522 	/* protected by mq_lock	*/
    523 	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
    524 	unsigned long locked_shm; /* How many pages of mlocked shm ? */
    525 
    526 #ifdef CONFIG_KEYS
    527 	struct key *uid_keyring;	/* UID specific keyring */
    528 	struct key *session_keyring;	/* UID's default session keyring */
    529 #endif
    530 
    531 	/* Hash table maintenance information */
    532 	struct list_head uidhash_list;
    533 	uid_t uid;
    534 };
    535 
    536 extern struct user_struct *find_user(uid_t);
    537 
    538 extern struct user_struct root_user;
    539 #define INIT_USER (&root_user)
    540 
    541 struct backing_dev_info;
    542 struct reclaim_state;
    543 
    544 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
    545 struct sched_info {
    546 	/* cumulative counters */
    547 	unsigned long	cpu_time,	/* time spent on the cpu */
    548 			run_delay,	/* time spent waiting on a runqueue */
    549 			pcnt;		/* # of timeslices run on this cpu */
    550 
    551 	/* timestamps */
    552 	unsigned long	last_arrival,	/* when we last ran on a cpu */
    553 			last_queued;	/* when we were last queued to run */
    554 };
    555 #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
    556 
    557 #ifdef CONFIG_SCHEDSTATS
    558 extern struct file_operations proc_schedstat_operations;
    559 #endif /* CONFIG_SCHEDSTATS */
    560 
    561 #ifdef CONFIG_TASK_DELAY_ACCT
    562 struct task_delay_info {
    563 	spinlock_t	lock;
    564 	unsigned int	flags;	/* Private per-task flags */
    565 
    566 	/* For each stat XXX, add following, aligned appropriately
    567 	 *
    568 	 * struct timespec XXX_start, XXX_end;
    569 	 * u64 XXX_delay;
    570 	 * u32 XXX_count;
    571 	 *
    572 	 * Atomicity of updates to XXX_delay, XXX_count protected by
    573 	 * single lock above (split into XXX_lock if contention is an issue).
    574 	 */
    575 
    576 	/*
    577 	 * XXX_count is incremented on every XXX operation, the delay
    578 	 * associated with the operation is added to XXX_delay.
    579 	 * XXX_delay contains the accumulated delay time in nanoseconds.
    580 	 */
    581 	struct timespec blkio_start, blkio_end;	/* Shared by blkio, swapin */
    582 	u64 blkio_delay;	/* wait for sync block io completion */
    583 	u64 swapin_delay;	/* wait for swapin block io completion */
    584 	u32 blkio_count;	/* total count of the number of sync block */
    585 				/* io operations performed */
    586 	u32 swapin_count;	/* total count of the number of swapin block */
    587 				/* io operations performed */
    588 };
    589 #endif	/* CONFIG_TASK_DELAY_ACCT */
    590 
    591 static inline int sched_info_on(void)
    592 {
    593 #ifdef CONFIG_SCHEDSTATS
    594 	return 1;
    595 #elif defined(CONFIG_TASK_DELAY_ACCT)
    596 	extern int delayacct_on;
    597 	return delayacct_on;
    598 #else
    599 	return 0;
    600 #endif
    601 }
    602 
    603 enum idle_type
    604 {
    605 	SCHED_IDLE,
    606 	NOT_IDLE,
    607 	NEWLY_IDLE,
    608 	MAX_IDLE_TYPES
    609 };
    610 
    611 /*
    612  * sched-domains (multiprocessor balancing) declarations:
    613  */
    614 #define SCHED_LOAD_SCALE	128UL	/* increase resolution of load */
    615 
    616 #ifdef CONFIG_SMP
    617 #define SD_LOAD_BALANCE		1	/* Do load balancing on this domain. */
    618 #define SD_BALANCE_NEWIDLE	2	/* Balance when about to become idle */
    619 #define SD_BALANCE_EXEC		4	/* Balance on exec */
    620 #define SD_BALANCE_FORK		8	/* Balance on fork, clone */
    621 #define SD_WAKE_IDLE		16	/* Wake to idle CPU on task wakeup */
    622 #define SD_WAKE_AFFINE		32	/* Wake task to waking CPU */
    623 #define SD_WAKE_BALANCE		64	/* Perform balancing at task wakeup */
    624 #define SD_SHARE_CPUPOWER	128	/* Domain members share cpu power */
    625 #define SD_POWERSAVINGS_BALANCE	256	/* Balance for power savings */
    626 
    627 #define BALANCE_FOR_POWER	((sched_mc_power_savings || sched_smt_power_savings) \
    628 				 ? SD_POWERSAVINGS_BALANCE : 0)
    629 
    630 
    631 struct sched_group {
    632 	struct sched_group *next;	/* Must be a circular list */
    633 	cpumask_t cpumask;
    634 
    635 	/*
    636 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
    637 	 * single CPU. This is read only (except for setup, hotplug CPU).
    638 	 */
    639 	unsigned long cpu_power;
    640 };
    641 
    642 struct sched_domain {
    643 	/* These fields must be setup */
    644 	struct sched_domain *parent;	/* top domain must be null terminated */
    645 	struct sched_group *groups;	/* the balancing groups of the domain */
    646 	cpumask_t span;			/* span of all CPUs in this domain */
    647 	unsigned long min_interval;	/* Minimum balance interval ms */
    648 	unsigned long max_interval;	/* Maximum balance interval ms */
    649 	unsigned int busy_factor;	/* less balancing by factor if busy */
    650 	unsigned int imbalance_pct;	/* No balance until over watermark */
    651 	unsigned long long cache_hot_time; /* Task considered cache hot (ns) */
    652 	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
    653 	unsigned int per_cpu_gain;	/* CPU % gained by adding domain cpus */
    654 	unsigned int busy_idx;
    655 	unsigned int idle_idx;
    656 	unsigned int newidle_idx;
    657 	unsigned int wake_idx;
    658 	unsigned int forkexec_idx;
    659 	int flags;			/* See SD_* */
    660 
    661 	/* Runtime fields. */
    662 	unsigned long last_balance;	/* init to jiffies. units in jiffies */
    663 	unsigned int balance_interval;	/* initialise to 1. units in ms. */
    664 	unsigned int nr_balance_failed; /* initialise to 0 */
    665 
    666 #ifdef CONFIG_SCHEDSTATS
    667 	/* load_balance() stats */
    668 	unsigned long lb_cnt[MAX_IDLE_TYPES];
    669 	unsigned long lb_failed[MAX_IDLE_TYPES];
    670 	unsigned long lb_balanced[MAX_IDLE_TYPES];
    671 	unsigned long lb_imbalance[MAX_IDLE_TYPES];
    672 	unsigned long lb_gained[MAX_IDLE_TYPES];
    673 	unsigned long lb_hot_gained[MAX_IDLE_TYPES];
    674 	unsigned long lb_nobusyg[MAX_IDLE_TYPES];
    675 	unsigned long lb_nobusyq[MAX_IDLE_TYPES];
    676 
    677 	/* Active load balancing */
    678 	unsigned long alb_cnt;
    679 	unsigned long alb_failed;
    680 	unsigned long alb_pushed;
    681 
    682 	/* SD_BALANCE_EXEC stats */
    683 	unsigned long sbe_cnt;
    684 	unsigned long sbe_balanced;
    685 	unsigned long sbe_pushed;
    686 
    687 	/* SD_BALANCE_FORK stats */
    688 	unsigned long sbf_cnt;
    689 	unsigned long sbf_balanced;
    690 	unsigned long sbf_pushed;
    691 
    692 	/* try_to_wake_up() stats */
    693 	unsigned long ttwu_wake_remote;
    694 	unsigned long ttwu_move_affine;
    695 	unsigned long ttwu_move_balance;
    696 #endif
    697 };
    698 
    699 extern int partition_sched_domains(cpumask_t *partition1,
    700 				    cpumask_t *partition2);
    701 
    702 /*
    703  * Maximum cache size the migration-costs auto-tuning code will
    704  * search from:
    705  */
    706 extern unsigned int max_cache_size;
    707 
    708 #endif	/* CONFIG_SMP */
    709 
    710 
    711 struct io_context;			/* See blkdev.h */
    712 void exit_io_context(void);
    713 struct cpuset;
    714 
    715 #define NGROUPS_SMALL		32
    716 #define NGROUPS_PER_BLOCK	((int)(PAGE_SIZE / sizeof(gid_t)))
    717 struct group_info {
    718 	int ngroups;
    719 	atomic_t usage;
    720 	gid_t small_block[NGROUPS_SMALL];
    721 	int nblocks;
    722 	gid_t *blocks[0];
    723 };
    724 
    725 /*
    726  * get_group_info() must be called with the owning task locked (via task_lock())
    727  * when task != current.  The reason being that the vast majority of callers are
    728  * looking at current->group_info, which can not be changed except by the
    729  * current task.  Changing current->group_info requires the task lock, too.
    730  */
    731 #define get_group_info(group_info) do { \
    732 	atomic_inc(&(group_info)->usage); \
    733 } while (0)
    734 
    735 #define put_group_info(group_info) do { \
    736 	if (atomic_dec_and_test(&(group_info)->usage)) \
    737 		groups_free(group_info); \
    738 } while (0)
    739 
    740 extern struct group_info *groups_alloc(int gidsetsize);
    741 extern void groups_free(struct group_info *group_info);
    742 extern int set_current_groups(struct group_info *group_info);
    743 extern int groups_search(struct group_info *group_info, gid_t grp);
    744 /* access the groups "array" with this macro */
    745 #define GROUP_AT(gi, i) \
    746     ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
    747 
    748 #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
    749 extern void prefetch_stack(struct task_struct *t);
    750 #else
    751 static inline void prefetch_stack(struct task_struct *t) { }
    752 #endif
    753 
    754 struct audit_context;		/* See audit.c */
    755 struct mempolicy;
    756 struct pipe_inode_info;
    757 
    758 enum sleep_type {
    759 	SLEEP_NORMAL,
    760 	SLEEP_NONINTERACTIVE,
    761 	SLEEP_INTERACTIVE,
    762 	SLEEP_INTERRUPTED,
    763 };
    764 
    765 struct prio_array;
    766 
    767 struct task_struct {
    768 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
    769 	struct thread_info *thread_info;
    770 	atomic_t usage;
    771 	unsigned long flags;	/* per process flags, defined below */
    772 	unsigned long ptrace;
    773 
    774 	int lock_depth;		/* BKL lock depth */
    775 
    776 #ifdef CONFIG_SMP
    777 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
    778 	int oncpu;
    779 #endif
    780 #endif
    781 	int load_weight;	/* for niceness load balancing purposes */
    782 	int prio, static_prio, normal_prio;
    783 	struct list_head run_list;
    784 	struct prio_array *array;
    785 
    786 	unsigned short ioprio;
    787 	unsigned int btrace_seq;
    788 
    789 	unsigned long sleep_avg;
    790 	unsigned long long timestamp, last_ran;
    791 	unsigned long long sched_time; /* sched_clock time spent running */
    792 	enum sleep_type sleep_type;
    793 
    794 	unsigned long policy;
    795 	cpumask_t cpus_allowed;
    796 	unsigned int time_slice, first_time_slice;
    797 
    798 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
    799 	struct sched_info sched_info;
    800 #endif
    801 
    802 	struct list_head tasks;
    803 	/*
    804 	 * ptrace_list/ptrace_children forms the list of my children
    805 	 * that were stolen by a ptracer.
    806 	 */
    807 	struct list_head ptrace_children;
    808 	struct list_head ptrace_list;
    809 
    810 	struct mm_struct *mm, *active_mm;
    811 
    812 /* task state */
    813 	struct linux_binfmt *binfmt;
    814 	long exit_state;
    815 	int exit_code, exit_signal;
    816 	int pdeath_signal;  /*  The signal sent when the parent dies  */
    817 	/* ??? */
    818 	unsigned long personality;
    819 	unsigned did_exec:1;
    820 	pid_t pid;
    821 	pid_t tgid;
    822 	/*
    823 	 * pointers to (original) parent process, youngest child, younger sibling,
    824 	 * older sibling, respectively.  (p->father can be replaced with
    825 	 * p->parent->pid)
    826 	 */
    827 	struct task_struct *real_parent; /* real parent process (when being debugged) */
    828 	struct task_struct *parent;	/* parent process */
    829 	/*
    830 	 * children/sibling forms the list of my children plus the
    831 	 * tasks I'm ptracing.
    832 	 */
    833 	struct list_head children;	/* list of my children */
    834 	struct list_head sibling;	/* linkage in my parent's children list */
    835 	struct task_struct *group_leader;	/* threadgroup leader */
    836 
    837 	/* PID/PID hash table linkage. */
    838 	struct pid_link pids[PIDTYPE_MAX];
    839 	struct list_head thread_group;
    840 
    841 	struct completion *vfork_done;		/* for vfork() */
    842 	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
    843 	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
    844 
    845 	unsigned long rt_priority;
    846 	cputime_t utime, stime;
    847 	unsigned long nvcsw, nivcsw; /* context switch counts */
    848 	struct timespec start_time;
    849 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
    850 	unsigned long min_flt, maj_flt;
    851 
    852   	cputime_t it_prof_expires, it_virt_expires;
    853 	unsigned long long it_sched_expires;
    854 	struct list_head cpu_timers[3];
    855 
    856 /* process credentials */
    857 	uid_t uid,euid,suid,fsuid;
    858 	gid_t gid,egid,sgid,fsgid;
    859 	struct group_info *group_info;
    860 	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
    861 	unsigned keep_capabilities:1;
    862 	struct user_struct *user;
    863 #ifdef CONFIG_KEYS
    864 	struct key *request_key_auth;	/* assumed request_key authority */
    865 	struct key *thread_keyring;	/* keyring private to this thread */
    866 	unsigned char jit_keyring;	/* default keyring to attach requested keys to */
    867 #endif
    868 	int oomkilladj; /* OOM kill score adjustment (bit shift). */
    869 	char comm[TASK_COMM_LEN]; /* executable name excluding path
    870 				     - access with [gs]et_task_comm (which lock
    871 				       it with task_lock())
    872 				     - initialized normally by flush_old_exec */
    873 /* file system info */
    874 	int link_count, total_link_count;
    875 /* ipc stuff */
    876 	struct sysv_sem sysvsem;
    877 /* CPU-specific state of this task */
    878 	struct thread_struct thread;
    879 /* filesystem information */
    880 	struct fs_struct *fs;
    881 /* open file information */
    882 	struct files_struct *files;
    883 /* namespace */
    884 	struct namespace *namespace;
    885 /* signal handlers */
    886 	struct signal_struct *signal;
    887 	struct sighand_struct *sighand;
    888 
    889 	sigset_t blocked, real_blocked;
    890 	sigset_t saved_sigmask;		/* To be restored with TIF_RESTORE_SIGMASK */
    891 	struct sigpending pending;
    892 
    893 	unsigned long sas_ss_sp;
    894 	size_t sas_ss_size;
    895 	int (*notifier)(void *priv);
    896 	void *notifier_data;
    897 	sigset_t *notifier_mask;
    898 
    899 	void *security;
    900 	struct audit_context *audit_context;
    901 	seccomp_t seccomp;
    902 
    903 /* Thread group tracking */
    904    	u32 parent_exec_id;
    905    	u32 self_exec_id;
    906 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
    907 	spinlock_t alloc_lock;
    908 
    909 	/* Protection of the PI data structures: */
    910 	spinlock_t pi_lock;
    911 
    912 #ifdef CONFIG_RT_MUTEXES
    913 	/* PI waiters blocked on a rt_mutex held by this task */
    914 	struct plist_head pi_waiters;
    915 	/* Deadlock detection and priority inheritance handling */
    916 	struct rt_mutex_waiter *pi_blocked_on;
    917 #endif
    918 
    919 #ifdef CONFIG_DEBUG_MUTEXES
    920 	/* mutex deadlock detection */
    921 	struct mutex_waiter *blocked_on;
    922 #endif
    923 #ifdef CONFIG_TRACE_IRQFLAGS
    924 	unsigned int irq_events;
    925 	int hardirqs_enabled;
    926 	unsigned long hardirq_enable_ip;
    927 	unsigned int hardirq_enable_event;
    928 	unsigned long hardirq_disable_ip;
    929 	unsigned int hardirq_disable_event;
    930 	int softirqs_enabled;
    931 	unsigned long softirq_disable_ip;
    932 	unsigned int softirq_disable_event;
    933 	unsigned long softirq_enable_ip;
    934 	unsigned int softirq_enable_event;
    935 	int hardirq_context;
    936 	int softirq_context;
    937 #endif
    938 #ifdef CONFIG_LOCKDEP
    939 # define MAX_LOCK_DEPTH 30UL
    940 	u64 curr_chain_key;
    941 	int lockdep_depth;
    942 	struct held_lock held_locks[MAX_LOCK_DEPTH];
    943 	unsigned int lockdep_recursion;
    944 #endif
    945 
    946 /* journalling filesystem info */
    947 	void *journal_info;
    948 
    949 /* VM state */
    950 	struct reclaim_state *reclaim_state;
    951 
    952 	struct backing_dev_info *backing_dev_info;
    953 
    954 	struct io_context *io_context;
    955 
    956 	unsigned long ptrace_message;
    957 	siginfo_t *last_siginfo; /* For ptrace use.  */
    958 /*
    959  * current io wait handle: wait queue entry to use for io waits
    960  * If this thread is processing aio, this points at the waitqueue
    961  * inside the currently handled kiocb. It may be NULL (i.e. default
    962  * to a stack based synchronous wait) if its doing sync IO.
    963  */
    964 	wait_queue_t *io_wait;
    965 /* i/o counters(bytes read/written, #syscalls */
    966 	u64 rchar, wchar, syscr, syscw;
    967 #if defined(CONFIG_BSD_PROCESS_ACCT)
    968 	u64 acct_rss_mem1;	/* accumulated rss usage */
    969 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
    970 	clock_t acct_stimexpd;	/* clock_t-converted stime since last update */
    971 #endif
    972 #ifdef CONFIG_NUMA
    973   	struct mempolicy *mempolicy;
    974 	short il_next;
    975 #endif
    976 #ifdef CONFIG_CPUSETS
    977 	struct cpuset *cpuset;
    978 	nodemask_t mems_allowed;
    979 	int cpuset_mems_generation;
    980 	int cpuset_mem_spread_rotor;
    981 #endif
    982 	struct robust_list_head __user *robust_list;
    983 #ifdef CONFIG_COMPAT
    984 	struct compat_robust_list_head __user *compat_robust_list;
    985 #endif
    986 	struct list_head pi_state_list;
    987 	struct futex_pi_state *pi_state_cache;
    988 
    989 	atomic_t fs_excl;	/* holding fs exclusive resources */
    990 	struct rcu_head rcu;
    991 
    992 	/*
    993 	 * cache last used pipe for splice
    994 	 */
    995 	struct pipe_inode_info *splice_pipe;
    996 #ifdef	CONFIG_TASK_DELAY_ACCT
    997 	struct task_delay_info *delays;
    998 #endif
    999 };
   1000 
   1001 static inline pid_t process_group(struct task_struct *tsk)
   1002 {
   1003 	return tsk->signal->pgrp;
   1004 }
   1005 
   1006 /**
   1007  * pid_alive - check that a task structure is not stale
   1008  * @p: Task structure to be checked.
   1009  *
   1010  * Test if a process is not yet dead (at most zombie state)
   1011  * If pid_alive fails, then pointers within the task structure
   1012  * can be stale and must not be dereferenced.
   1013  */
   1014 static inline int pid_alive(struct task_struct *p)
   1015 {
   1016 	return p->pids[PIDTYPE_PID].pid != NULL;
   1017 }
   1018 
   1019 extern void free_task(struct task_struct *tsk);
   1020 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
   1021 
   1022 extern void __put_task_struct(struct task_struct *t);
   1023 
   1024 static inline void put_task_struct(struct task_struct *t)
   1025 {
   1026 	if (atomic_dec_and_test(&t->usage))
   1027 		__put_task_struct(t);
   1028 }
   1029 
   1030 /*
   1031  * Per process flags
   1032  */
   1033 #define PF_ALIGNWARN	0x00000001	/* Print alignment warning msgs */
   1034 					/* Not implemented yet, only for 486*/
   1035 #define PF_STARTING	0x00000002	/* being created */
   1036 #define PF_EXITING	0x00000004	/* getting shut down */
   1037 #define PF_DEAD		0x00000008	/* Dead */
   1038 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
   1039 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
   1040 #define PF_DUMPCORE	0x00000200	/* dumped core */
   1041 #define PF_SIGNALED	0x00000400	/* killed by a signal */
   1042 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
   1043 #define PF_FLUSHER	0x00001000	/* responsible for disk writeback */
   1044 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
   1045 #define PF_FREEZE	0x00004000	/* this task is being frozen for suspend now */
   1046 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
   1047 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
   1048 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
   1049 #define PF_KSWAPD	0x00040000	/* I am kswapd */
   1050 #define PF_SWAPOFF	0x00080000	/* I am in swapoff */
   1051 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
   1052 #define PF_BORROWED_MM	0x00200000	/* I am a kthread doing use_mm */
   1053 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
   1054 #define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
   1055 #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
   1056 #define PF_SPREAD_SLAB	0x02000000	/* Spread some slab caches over cpuset */
   1057 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
   1058 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
   1059 
   1060 /*
   1061  * Only the _current_ task can read/write to tsk->flags, but other
   1062  * tasks can access tsk->flags in readonly mode for example
   1063  * with tsk_used_math (like during threaded core dumping).
   1064  * There is however an exception to this rule during ptrace
   1065  * or during fork: the ptracer task is allowed to write to the
   1066  * child->flags of its traced child (same goes for fork, the parent
   1067  * can write to the child->flags), because we're guaranteed the
   1068  * child is not running and in turn not changing child->flags
   1069  * at the same time the parent does it.
   1070  */
   1071 #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
   1072 #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
   1073 #define clear_used_math() clear_stopped_child_used_math(current)
   1074 #define set_used_math() set_stopped_child_used_math(current)
   1075 #define conditional_stopped_child_used_math(condition, child) \
   1076 	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
   1077 #define conditional_used_math(condition) \
   1078 	conditional_stopped_child_used_math(condition, current)
   1079 #define copy_to_stopped_child_used_math(child) \
   1080 	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
   1081 /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
   1082 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
   1083 #define used_math() tsk_used_math(current)
   1084 
   1085 #ifdef CONFIG_SMP
   1086 extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask);
   1087 #else
   1088 static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
   1089 {
   1090 	if (!cpu_isset(0, new_mask))
   1091 		return -EINVAL;
   1092 	return 0;
   1093 }
   1094 #endif
   1095 
   1096 extern unsigned long long sched_clock(void);
   1097 extern unsigned long long
   1098 current_sched_time(const struct task_struct *current_task);
   1099 
   1100 /* sched_exec is called by processes performing an exec */
   1101 #ifdef CONFIG_SMP
   1102 extern void sched_exec(void);
   1103 #else
   1104 #define sched_exec()   {}
   1105 #endif
   1106 
   1107 #ifdef CONFIG_HOTPLUG_CPU
   1108 extern void idle_task_exit(void);
   1109 #else
   1110 static inline void idle_task_exit(void) {}
   1111 #endif
   1112 
   1113 extern void sched_idle_next(void);
   1114 
   1115 #ifdef CONFIG_RT_MUTEXES
   1116 extern int rt_mutex_getprio(struct task_struct *p);
   1117 extern void rt_mutex_setprio(struct task_struct *p, int prio);
   1118 extern void rt_mutex_adjust_pi(struct task_struct *p);
   1119 #else
   1120 static inline int rt_mutex_getprio(struct task_struct *p)
   1121 {
   1122 	return p->normal_prio;
   1123 }
   1124 # define rt_mutex_adjust_pi(p)		do { } while (0)
   1125 #endif
   1126 
   1127 extern void set_user_nice(struct task_struct *p, long nice);
   1128 extern int task_prio(const struct task_struct *p);
   1129 extern int task_nice(const struct task_struct *p);
   1130 extern int can_nice(const struct task_struct *p, const int nice);
   1131 extern int task_curr(const struct task_struct *p);
   1132 extern int idle_cpu(int cpu);
   1133 extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
   1134 extern struct task_struct *idle_task(int cpu);
   1135 extern struct task_struct *curr_task(int cpu);
   1136 extern void set_curr_task(int cpu, struct task_struct *p);
   1137 
   1138 void yield(void);
   1139 
   1140 /*
   1141  * The default (Linux) execution domain.
   1142  */
   1143 extern struct exec_domain	default_exec_domain;
   1144 
   1145 union thread_union {
   1146 	struct thread_info thread_info;
   1147 	unsigned long stack[THREAD_SIZE/sizeof(long)];
   1148 };
   1149 
   1150 #ifndef __HAVE_ARCH_KSTACK_END
   1151 static inline int kstack_end(void *addr)
   1152 {
   1153 	/* Reliable end of stack detection:
   1154 	 * Some APM bios versions misalign the stack
   1155 	 */
   1156 	return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
   1157 }
   1158 #endif
   1159 
   1160 extern union thread_union init_thread_union;
   1161 extern struct task_struct init_task;
   1162 
   1163 extern struct   mm_struct init_mm;
   1164 
   1165 #define find_task_by_pid(nr)	find_task_by_pid_type(PIDTYPE_PID, nr)
   1166 extern struct task_struct *find_task_by_pid_type(int type, int pid);
   1167 extern void set_special_pids(pid_t session, pid_t pgrp);
   1168 extern void __set_special_pids(pid_t session, pid_t pgrp);
   1169 
   1170 /* per-UID process charging. */
   1171 extern struct user_struct * alloc_uid(uid_t);
   1172 static inline struct user_struct *get_uid(struct user_struct *u)
   1173 {
   1174 	atomic_inc(&u->__count);
   1175 	return u;
   1176 }
   1177 extern void free_uid(struct user_struct *);
   1178 extern void switch_uid(struct user_struct *);
   1179 
   1180 #include <asm/current.h>
   1181 
   1182 extern void do_timer(struct pt_regs *);
   1183 
   1184 extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
   1185 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
   1186 extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
   1187 						unsigned long clone_flags));
   1188 #ifdef CONFIG_SMP
   1189  extern void kick_process(struct task_struct *tsk);
   1190 #else
   1191  static inline void kick_process(struct task_struct *tsk) { }
   1192 #endif
   1193 extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags));
   1194 extern void FASTCALL(sched_exit(struct task_struct * p));
   1195 
   1196 extern int in_group_p(gid_t);
   1197 extern int in_egroup_p(gid_t);
   1198 
   1199 extern void proc_caches_init(void);
   1200 extern void flush_signals(struct task_struct *);
   1201 extern void flush_signal_handlers(struct task_struct *, int force_default);
   1202 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
   1203 
   1204 static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
   1205 {
   1206 	unsigned long flags;
   1207 	int ret;
   1208 
   1209 	spin_lock_irqsave(&tsk->sighand->siglock, flags);
   1210 	ret = dequeue_signal(tsk, mask, info);
   1211 	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
   1212 
   1213 	return ret;
   1214 }
   1215 
   1216 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
   1217 			      sigset_t *mask);
   1218 extern void unblock_all_signals(void);
   1219 extern void release_task(struct task_struct * p);
   1220 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
   1221 extern int send_group_sig_info(int, struct siginfo *, struct task_struct *);
   1222 extern int force_sigsegv(int, struct task_struct *);
   1223 extern int force_sig_info(int, struct siginfo *, struct task_struct *);
   1224 extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp);
   1225 extern int kill_pg_info(int, struct siginfo *, pid_t);
   1226 extern int kill_proc_info(int, struct siginfo *, pid_t);
   1227 extern int kill_proc_info_as_uid(int, struct siginfo *, pid_t, uid_t, uid_t, u32);
   1228 extern void do_notify_parent(struct task_struct *, int);
   1229 extern void force_sig(int, struct task_struct *);
   1230 extern void force_sig_specific(int, struct task_struct *);
   1231 extern int send_sig(int, struct task_struct *, int);
   1232 extern void zap_other_threads(struct task_struct *p);
   1233 extern int kill_pg(pid_t, int, int);
   1234 extern int kill_proc(pid_t, int, int);
   1235 extern struct sigqueue *sigqueue_alloc(void);
   1236 extern void sigqueue_free(struct sigqueue *);
   1237 extern int send_sigqueue(int, struct sigqueue *,  struct task_struct *);
   1238 extern int send_group_sigqueue(int, struct sigqueue *,  struct task_struct *);
   1239 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
   1240 extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
   1241 
   1242 /* These can be the second arg to send_sig_info/send_group_sig_info.  */
   1243 #define SEND_SIG_NOINFO ((struct siginfo *) 0)
   1244 #define SEND_SIG_PRIV	((struct siginfo *) 1)
   1245 #define SEND_SIG_FORCED	((struct siginfo *) 2)
   1246 
   1247 static inline int is_si_special(const struct siginfo *info)
   1248 {
   1249 	return info <= SEND_SIG_FORCED;
   1250 }
   1251 
   1252 /* True if we are on the alternate signal stack.  */
   1253 
   1254 static inline int on_sig_stack(unsigned long sp)
   1255 {
   1256 	return (sp - current->sas_ss_sp < current->sas_ss_size);
   1257 }
   1258 
   1259 static inline int sas_ss_flags(unsigned long sp)
   1260 {
   1261 	return (current->sas_ss_size == 0 ? SS_DISABLE
   1262 		: on_sig_stack(sp) ? SS_ONSTACK : 0);
   1263 }
   1264 
   1265 /*
   1266  * Routines for handling mm_structs
   1267  */
   1268 extern struct mm_struct * mm_alloc(void);
   1269 
   1270 /* mmdrop drops the mm and the page tables */
   1271 extern void FASTCALL(__mmdrop(struct mm_struct *));
   1272 static inline void mmdrop(struct mm_struct * mm)
   1273 {
   1274 	if (atomic_dec_and_test(&mm->mm_count))
   1275 		__mmdrop(mm);
   1276 }
   1277 
   1278 /* mmput gets rid of the mappings and all user-space */
   1279 extern void mmput(struct mm_struct *);
   1280 /* Grab a reference to a task's mm, if it is not already going away */
   1281 extern struct mm_struct *get_task_mm(struct task_struct *task);
   1282 /* Remove the current tasks stale references to the old mm_struct */
   1283 extern void mm_release(struct task_struct *, struct mm_struct *);
   1284 
   1285 extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
   1286 extern void flush_thread(void);
   1287 extern void exit_thread(void);
   1288 
   1289 extern void exit_files(struct task_struct *);
   1290 extern void __cleanup_signal(struct signal_struct *);
   1291 extern void __cleanup_sighand(struct sighand_struct *);
   1292 extern void exit_itimers(struct signal_struct *);
   1293 
   1294 extern NORET_TYPE void do_group_exit(int);
   1295 
   1296 extern void daemonize(const char *, ...);
   1297 extern int allow_signal(int);
   1298 extern int disallow_signal(int);
   1299 extern struct task_struct *child_reaper;
   1300 
   1301 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
   1302 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
   1303 struct task_struct *fork_idle(int);
   1304 
   1305 extern void set_task_comm(struct task_struct *tsk, char *from);
   1306 extern void get_task_comm(char *to, struct task_struct *tsk);
   1307 
   1308 #ifdef CONFIG_SMP
   1309 extern void wait_task_inactive(struct task_struct * p);
   1310 #else
   1311 #define wait_task_inactive(p)	do { } while (0)
   1312 #endif
   1313 
   1314 #define remove_parent(p)	list_del_init(&(p)->sibling)
   1315 #define add_parent(p)		list_add_tail(&(p)->sibling,&(p)->parent->children)
   1316 
   1317 #define next_task(p)	list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
   1318 
   1319 #define for_each_process(p) \
   1320 	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
   1321 
   1322 /*
   1323  * Careful: do_each_thread/while_each_thread is a double loop so
   1324  *          'break' will not work as expected - use goto instead.
   1325  */
   1326 #define do_each_thread(g, t) \
   1327 	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
   1328 
   1329 #define while_each_thread(g, t) \
   1330 	while ((t = next_thread(t)) != g)
   1331 
   1332 /* de_thread depends on thread_group_leader not being a pid based check */
   1333 #define thread_group_leader(p)	(p == p->group_leader)
   1334 
   1335 static inline struct task_struct *next_thread(const struct task_struct *p)
   1336 {
   1337 	return list_entry(rcu_dereference(p->thread_group.next),
   1338 			  struct task_struct, thread_group);
   1339 }
   1340 
   1341 static inline int thread_group_empty(struct task_struct *p)
   1342 {
   1343 	return list_empty(&p->thread_group);
   1344 }
   1345 
   1346 #define delay_group_leader(p) \
   1347 		(thread_group_leader(p) && !thread_group_empty(p))
   1348 
   1349 /*
   1350  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
   1351  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
   1352  * pins the final release of task.io_context.  Also protects ->cpuset.
   1353  *
   1354  * Nests both inside and outside of read_lock(&tasklist_lock).
   1355  * It must not be nested with write_lock_irq(&tasklist_lock),
   1356  * neither inside nor outside.
   1357  */
   1358 static inline void task_lock(struct task_struct *p)
   1359 {
   1360 	spin_lock(&p->alloc_lock);
   1361 }
   1362 
   1363 static inline void task_unlock(struct task_struct *p)
   1364 {
   1365 	spin_unlock(&p->alloc_lock);
   1366 }
   1367 
   1368 extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
   1369 							unsigned long *flags);
   1370 
   1371 static inline void unlock_task_sighand(struct task_struct *tsk,
   1372 						unsigned long *flags)
   1373 {
   1374 	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
   1375 }
   1376 
   1377 #ifndef __HAVE_THREAD_FUNCTIONS
   1378 
   1379 #define task_thread_info(task) (task)->thread_info
   1380 #define task_stack_page(task) ((void*)((task)->thread_info))
   1381 
   1382 static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
   1383 {
   1384 	*task_thread_info(p) = *task_thread_info(org);
   1385 	task_thread_info(p)->task = p;
   1386 }
   1387 
   1388 static inline unsigned long *end_of_stack(struct task_struct *p)
   1389 {
   1390 	return (unsigned long *)(p->thread_info + 1);
   1391 }
   1392 
   1393 #endif
   1394 
   1395 /* set thread flags in other task's structures
   1396  * - see asm/thread_info.h for TIF_xxxx flags available
   1397  */
   1398 static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
   1399 {
   1400 	set_ti_thread_flag(task_thread_info(tsk), flag);
   1401 }
   1402 
   1403 static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
   1404 {
   1405 	clear_ti_thread_flag(task_thread_info(tsk), flag);
   1406 }
   1407 
   1408 static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
   1409 {
   1410 	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
   1411 }
   1412 
   1413 static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
   1414 {
   1415 	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
   1416 }
   1417 
   1418 static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
   1419 {
   1420 	return test_ti_thread_flag(task_thread_info(tsk), flag);
   1421 }
   1422 
   1423 static inline void set_tsk_need_resched(struct task_struct *tsk)
   1424 {
   1425 	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
   1426 }
   1427 
   1428 static inline void clear_tsk_need_resched(struct task_struct *tsk)
   1429 {
   1430 	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
   1431 }
   1432 
   1433 static inline int signal_pending(struct task_struct *p)
   1434 {
   1435 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
   1436 }
   1437 
   1438 static inline int need_resched(void)
   1439 {
   1440 	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
   1441 }
   1442 
   1443 /*
   1444  * cond_resched() and cond_resched_lock(): latency reduction via
   1445  * explicit rescheduling in places that are safe. The return
   1446  * value indicates whether a reschedule was done in fact.
   1447  * cond_resched_lock() will drop the spinlock before scheduling,
   1448  * cond_resched_softirq() will enable bhs before scheduling.
   1449  */
   1450 extern int cond_resched(void);
   1451 extern int cond_resched_lock(spinlock_t * lock);
   1452 extern int cond_resched_softirq(void);
   1453 
   1454 /*
   1455  * Does a critical section need to be broken due to another
   1456  * task waiting?:
   1457  */
   1458 #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
   1459 # define need_lockbreak(lock) ((lock)->break_lock)
   1460 #else
   1461 # define need_lockbreak(lock) 0
   1462 #endif
   1463 
   1464 /*
   1465  * Does a critical section need to be broken due to another
   1466  * task waiting or preemption being signalled:
   1467  */
   1468 static inline int lock_need_resched(spinlock_t *lock)
   1469 {
   1470 	if (need_lockbreak(lock) || need_resched())
   1471 		return 1;
   1472 	return 0;
   1473 }
   1474 
   1475 /* Reevaluate whether the task has signals pending delivery.
   1476    This is required every time the blocked sigset_t changes.
   1477    callers must hold sighand->siglock.  */
   1478 
   1479 extern FASTCALL(void recalc_sigpending_tsk(struct task_struct *t));
   1480 extern void recalc_sigpending(void);
   1481 
   1482 extern void signal_wake_up(struct task_struct *t, int resume_stopped);
   1483 
   1484 /*
   1485  * Wrappers for p->thread_info->cpu access. No-op on UP.
   1486  */
   1487 #ifdef CONFIG_SMP
   1488 
   1489 static inline unsigned int task_cpu(const struct task_struct *p)
   1490 {
   1491 	return task_thread_info(p)->cpu;
   1492 }
   1493 
   1494 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
   1495 {
   1496 	task_thread_info(p)->cpu = cpu;
   1497 }
   1498 
   1499 #else
   1500 
   1501 static inline unsigned int task_cpu(const struct task_struct *p)
   1502 {
   1503 	return 0;
   1504 }
   1505 
   1506 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
   1507 {
   1508 }
   1509 
   1510 #endif /* CONFIG_SMP */
   1511 
   1512 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
   1513 extern void arch_pick_mmap_layout(struct mm_struct *mm);
   1514 #else
   1515 static inline void arch_pick_mmap_layout(struct mm_struct *mm)
   1516 {
   1517 	mm->mmap_base = TASK_UNMAPPED_BASE;
   1518 	mm->get_unmapped_area = arch_get_unmapped_area;
   1519 	mm->unmap_area = arch_unmap_area;
   1520 }
   1521 #endif
   1522 
   1523 extern long sched_setaffinity(pid_t pid, cpumask_t new_mask);
   1524 extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
   1525 
   1526 #include <linux/sysdev.h>
   1527 extern int sched_mc_power_savings, sched_smt_power_savings;
   1528 extern struct sysdev_attribute attr_sched_mc_power_savings, attr_sched_smt_power_savings;
   1529 extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls);
   1530 
   1531 extern void normalize_rt_tasks(void);
   1532 
   1533 #ifdef CONFIG_PM
   1534 /*
   1535  * Check if a process has been frozen
   1536  */
   1537 static inline int frozen(struct task_struct *p)
   1538 {
   1539 	return p->flags & PF_FROZEN;
   1540 }
   1541 
   1542 /*
   1543  * Check if there is a request to freeze a process
   1544  */
   1545 static inline int freezing(struct task_struct *p)
   1546 {
   1547 	return p->flags & PF_FREEZE;
   1548 }
   1549 
   1550 /*
   1551  * Request that a process be frozen
   1552  * FIXME: SMP problem. We may not modify other process' flags!
   1553  */
   1554 static inline void freeze(struct task_struct *p)
   1555 {
   1556 	p->flags |= PF_FREEZE;
   1557 }
   1558 
   1559 /*
   1560  * Sometimes we may need to cancel the previous 'freeze' request
   1561  */
   1562 static inline void do_not_freeze(struct task_struct *p)
   1563 {
   1564 	p->flags &= ~PF_FREEZE;
   1565 }
   1566 
   1567 /*
   1568  * Wake up a frozen process
   1569  */
   1570 static inline int thaw_process(struct task_struct *p)
   1571 {
   1572 	if (frozen(p)) {
   1573 		p->flags &= ~PF_FROZEN;
   1574 		wake_up_process(p);
   1575 		return 1;
   1576 	}
   1577 	return 0;
   1578 }
   1579 
   1580 /*
   1581  * freezing is complete, mark process as frozen
   1582  */
   1583 static inline void frozen_process(struct task_struct *p)
   1584 {
   1585 	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
   1586 }
   1587 
   1588 extern void refrigerator(void);
   1589 extern int freeze_processes(void);
   1590 extern void thaw_processes(void);
   1591 
   1592 static inline int try_to_freeze(void)
   1593 {
   1594 	if (freezing(current)) {
   1595 		refrigerator();
   1596 		return 1;
   1597 	} else
   1598 		return 0;
   1599 }
   1600 #else
   1601 static inline int frozen(struct task_struct *p) { return 0; }
   1602 static inline int freezing(struct task_struct *p) { return 0; }
   1603 static inline void freeze(struct task_struct *p) { BUG(); }
   1604 static inline int thaw_process(struct task_struct *p) { return 1; }
   1605 static inline void frozen_process(struct task_struct *p) { BUG(); }
   1606 
   1607 static inline void refrigerator(void) {}
   1608 static inline int freeze_processes(void) { BUG(); return 0; }
   1609 static inline void thaw_processes(void) {}
   1610 
   1611 static inline int try_to_freeze(void) { return 0; }
   1612 
   1613 #endif /* CONFIG_PM */
   1614 #endif /* __KERNEL__ */
   1615 
   1616 #endif
   1617