diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditsc.c | 14 | ||||
-rw-r--r-- | kernel/cgroup.c | 49 | ||||
-rw-r--r-- | kernel/compat.c | 65 | ||||
-rw-r--r-- | kernel/fork.c | 5 | ||||
-rw-r--r-- | kernel/hrtimer.c | 4 | ||||
-rw-r--r-- | kernel/kexec.c | 43 | ||||
-rw-r--r-- | kernel/kmod.c | 98 | ||||
-rw-r--r-- | kernel/kthread.c | 44 | ||||
-rw-r--r-- | kernel/lglock.c | 12 | ||||
-rw-r--r-- | kernel/pid.c | 11 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 2 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 440 | ||||
-rw-r--r-- | kernel/printk.c | 36 | ||||
-rw-r--r-- | kernel/ptrace.c | 81 | ||||
-rw-r--r-- | kernel/range.c | 3 | ||||
-rw-r--r-- | kernel/rcutree.c | 2 | ||||
-rw-r--r-- | kernel/relay.c | 14 | ||||
-rw-r--r-- | kernel/semaphore.c | 8 | ||||
-rw-r--r-- | kernel/signal.c | 6 | ||||
-rw-r--r-- | kernel/smp.c | 102 | ||||
-rw-r--r-- | kernel/sys.c | 221 | ||||
-rw-r--r-- | kernel/sysctl.c | 15 | ||||
-rw-r--r-- | kernel/test_kprobes.c | 2 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 104 | ||||
-rw-r--r-- | kernel/timer.c | 143 | ||||
-rw-r--r-- | kernel/watchdog.c | 10 |
26 files changed, 737 insertions, 797 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index a371f857a0a9..c68229411a7c 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1034,21 +1034,15 @@ static inline void audit_free_aux(struct audit_context *context) } } -static inline void audit_zero_context(struct audit_context *context, - enum audit_state state) -{ - memset(context, 0, sizeof(*context)); - context->state = state; - context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; -} - static inline struct audit_context *audit_alloc_context(enum audit_state state) { struct audit_context *context; - if (!(context = kmalloc(sizeof(*context), GFP_KERNEL))) + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) return NULL; - audit_zero_context(context, state); + context->state = state; + context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; INIT_LIST_HEAD(&context->killed_trees); INIT_LIST_HEAD(&context->names_list); return context; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index bfc00044f9a9..a790409425bd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5287,55 +5287,6 @@ struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id) } EXPORT_SYMBOL_GPL(css_lookup); -/** - * css_get_next - lookup next cgroup under specified hierarchy. - * @ss: pointer to subsystem - * @id: current position of iteration. - * @root: pointer to css. search tree under this. - * @foundid: position of found object. - * - * Search next css under the specified hierarchy of rootid. Calling under - * rcu_read_lock() is necessary. Returns NULL if it reaches the end. - */ -struct cgroup_subsys_state * -css_get_next(struct cgroup_subsys *ss, int id, - struct cgroup_subsys_state *root, int *foundid) -{ - struct cgroup_subsys_state *ret = NULL; - struct css_id *tmp; - int tmpid; - int rootid = css_id(root); - int depth = css_depth(root); - - if (!rootid) - return NULL; - - BUG_ON(!ss->use_id); - WARN_ON_ONCE(!rcu_read_lock_held()); - - /* fill start point for scan */ - tmpid = id; - while (1) { - /* - * scan next entry from bitmap(tree), tmpid is updated after - * idr_get_next(). - */ - tmp = idr_get_next(&ss->idr, &tmpid); - if (!tmp) - break; - if (tmp->depth >= depth && tmp->stack[depth] == rootid) { - ret = rcu_dereference(tmp->css); - if (ret) { - *foundid = tmpid; - break; - } - } - /* continue to scan from next id */ - tmpid = tmpid + 1; - } - return ret; -} - /* * get corresponding css from file open on cgroupfs directory */ diff --git a/kernel/compat.c b/kernel/compat.c index c5620d6435e0..0a09e481b70b 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1119,71 +1119,6 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, } #endif -struct compat_sysinfo { - s32 uptime; - u32 loads[3]; - u32 totalram; - u32 freeram; - u32 sharedram; - u32 bufferram; - u32 totalswap; - u32 freeswap; - u16 procs; - u16 pad; - u32 totalhigh; - u32 freehigh; - u32 mem_unit; - char _f[20-2*sizeof(u32)-sizeof(int)]; -}; - -asmlinkage long -compat_sys_sysinfo(struct compat_sysinfo __user *info) -{ - struct sysinfo s; - - do_sysinfo(&s); - - /* Check to see if any memory value is too large for 32-bit and scale - * down if needed - */ - if ((s.totalram >> 32) || (s.totalswap >> 32)) { - int bitcount = 0; - - while (s.mem_unit < PAGE_SIZE) { - s.mem_unit <<= 1; - bitcount++; - } - - s.totalram >>= bitcount; - s.freeram >>= bitcount; - s.sharedram >>= bitcount; - s.bufferram >>= bitcount; - s.totalswap >>= bitcount; - s.freeswap >>= bitcount; - s.totalhigh >>= bitcount; - s.freehigh >>= bitcount; - } - - if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || - __put_user (s.uptime, &info->uptime) || - __put_user (s.loads[0], &info->loads[0]) || - __put_user (s.loads[1], &info->loads[1]) || - __put_user (s.loads[2], &info->loads[2]) || - __put_user (s.totalram, &info->totalram) || - __put_user (s.freeram, &info->freeram) || - __put_user (s.sharedram, &info->sharedram) || - __put_user (s.bufferram, &info->bufferram) || - __put_user (s.totalswap, &info->totalswap) || - __put_user (s.freeswap, &info->freeswap) || - __put_user (s.procs, &info->procs) || - __put_user (s.totalhigh, &info->totalhigh) || - __put_user (s.freehigh, &info->freehigh) || - __put_user (s.mem_unit, &info->mem_unit)) - return -EFAULT; - - return 0; -} - COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, compat_pid_t, pid, struct compat_timespec __user *, interval) diff --git a/kernel/fork.c b/kernel/fork.c index 0519c9b3261c..6fcc2e24561e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -70,6 +70,7 @@ #include <linux/khugepaged.h> #include <linux/signalfd.h> #include <linux/uprobes.h> +#include <linux/aio.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -364,8 +365,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->locked_vm = 0; mm->mmap = NULL; mm->mmap_cache = NULL; - mm->free_area_cache = oldmm->mmap_base; - mm->cached_hole_size = ~0UL; mm->map_count = 0; cpumask_clear(mm_cpumask(mm)); mm->mm_rb = RB_ROOT; @@ -539,8 +538,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) mm->nr_ptes = 0; memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); spin_lock_init(&mm->page_table_lock); - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; mm_init_aio(mm); mm_init_owner(mm, p); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index fd4b13b131f8..b963bea3d37a 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -315,6 +315,10 @@ ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec) } else { unsigned long rem = do_div(nsec, NSEC_PER_SEC); + /* Make sure nsec fits into long */ + if (unlikely(nsec > KTIME_SEC_MAX)) + return (ktime_t){ .tv64 = KTIME_MAX }; + tmp = ktime_set((long)nsec, rem); } diff --git a/kernel/kexec.c b/kernel/kexec.c index bddd3d7a74b6..38f5fab4d5eb 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -786,7 +786,7 @@ static int kimage_load_normal_segment(struct kimage *image, struct kexec_segment *segment) { unsigned long maddr; - unsigned long ubytes, mbytes; + size_t ubytes, mbytes; int result; unsigned char __user *buf; @@ -819,13 +819,9 @@ static int kimage_load_normal_segment(struct kimage *image, /* Start with a clear page */ clear_page(ptr); ptr += maddr & ~PAGE_MASK; - mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); - if (mchunk > mbytes) - mchunk = mbytes; - - uchunk = mchunk; - if (uchunk > ubytes) - uchunk = ubytes; + mchunk = min_t(size_t, mbytes, + PAGE_SIZE - (maddr & ~PAGE_MASK)); + uchunk = min(ubytes, mchunk); result = copy_from_user(ptr, buf, uchunk); kunmap(page); @@ -850,7 +846,7 @@ static int kimage_load_crash_segment(struct kimage *image, * We do things a page at a time for the sake of kmap. */ unsigned long maddr; - unsigned long ubytes, mbytes; + size_t ubytes, mbytes; int result; unsigned char __user *buf; @@ -871,13 +867,10 @@ static int kimage_load_crash_segment(struct kimage *image, } ptr = kmap(page); ptr += maddr & ~PAGE_MASK; - mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); - if (mchunk > mbytes) - mchunk = mbytes; - - uchunk = mchunk; - if (uchunk > ubytes) { - uchunk = ubytes; + mchunk = min_t(size_t, mbytes, + PAGE_SIZE - (maddr & ~PAGE_MASK)); + uchunk = min(ubytes, mchunk); + if (mchunk > uchunk) { /* Zero the trailing part of the page */ memset(ptr + uchunk, 0, mchunk - uchunk); } @@ -1118,12 +1111,8 @@ void __weak crash_free_reserved_phys_range(unsigned long begin, { unsigned long addr; - for (addr = begin; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); - init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); - free_page((unsigned long)__va(addr)); - totalram_pages++; - } + for (addr = begin; addr < end; addr += PAGE_SIZE) + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); } int crash_shrink_memory(unsigned long new_size) @@ -1452,14 +1441,13 @@ void vmcoreinfo_append_str(const char *fmt, ...) { va_list args; char buf[0x50]; - int r; + size_t r; va_start(args, fmt); r = vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); - if (r + vmcoreinfo_size > vmcoreinfo_max_size) - r = vmcoreinfo_max_size - vmcoreinfo_size; + r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); @@ -1489,7 +1477,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_SYMBOL(swapper_pg_dir); #endif VMCOREINFO_SYMBOL(_stext); - VMCOREINFO_SYMBOL(vmlist); + VMCOREINFO_SYMBOL(vmap_area_list); #ifndef CONFIG_NEED_MULTIPLE_NODES VMCOREINFO_SYMBOL(mem_map); @@ -1527,7 +1515,8 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_OFFSET(free_area, free_list); VMCOREINFO_OFFSET(list_head, next); VMCOREINFO_OFFSET(list_head, prev); - VMCOREINFO_OFFSET(vm_struct, addr); + VMCOREINFO_OFFSET(vmap_area, va_start); + VMCOREINFO_OFFSET(vmap_area, list); VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); log_buf_kexec_setup(); VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); diff --git a/kernel/kmod.c b/kernel/kmod.c index 56dd34976d7b..1296e72e4161 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -77,6 +77,7 @@ static void free_modprobe_argv(struct subprocess_info *info) static int call_modprobe(char *module_name, int wait) { + struct subprocess_info *info; static char *envp[] = { "HOME=/", "TERM=linux", @@ -98,8 +99,15 @@ static int call_modprobe(char *module_name, int wait) argv[3] = module_name; /* check free_modprobe_argv() */ argv[4] = NULL; - return call_usermodehelper_fns(modprobe_path, argv, envp, - wait | UMH_KILLABLE, NULL, free_modprobe_argv, NULL); + info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL, + NULL, free_modprobe_argv, NULL); + if (!info) + goto free_module_name; + + return call_usermodehelper_exec(info, wait | UMH_KILLABLE); + +free_module_name: + kfree(module_name); free_argv: kfree(argv); out: @@ -502,14 +510,28 @@ static void helper_unlock(void) * @argv: arg vector for process * @envp: environment for process * @gfp_mask: gfp mask for memory allocation + * @cleanup: a cleanup function + * @init: an init function + * @data: arbitrary context sensitive data * * Returns either %NULL on allocation failure, or a subprocess_info * structure. This should be passed to call_usermodehelper_exec to * exec the process and free the structure. + * + * The init function is used to customize the helper process prior to + * exec. A non-zero return code causes the process to error out, exit, + * and return the failure to the calling process + * + * The cleanup function is just before ethe subprocess_info is about to + * be freed. This can be used for freeing the argv and envp. The + * Function must be runnable in either a process context or the + * context in which call_usermodehelper_exec is called. */ -static struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, - char **envp, gfp_t gfp_mask) + char **envp, gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *info), + void *data) { struct subprocess_info *sub_info; sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); @@ -520,50 +542,27 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, sub_info->path = path; sub_info->argv = argv; sub_info->envp = envp; + + sub_info->cleanup = cleanup; + sub_info->init = init; + sub_info->data = data; out: return sub_info; } - -/** - * call_usermodehelper_setfns - set a cleanup/init function - * @info: a subprocess_info returned by call_usermodehelper_setup - * @cleanup: a cleanup function - * @init: an init function - * @data: arbitrary context sensitive data - * - * The init function is used to customize the helper process prior to - * exec. A non-zero return code causes the process to error out, exit, - * and return the failure to the calling process - * - * The cleanup function is just before ethe subprocess_info is about to - * be freed. This can be used for freeing the argv and envp. The - * Function must be runnable in either a process context or the - * context in which call_usermodehelper_exec is called. - */ -static -void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *info), - void *data) -{ - info->cleanup = cleanup; - info->init = init; - info->data = data; -} +EXPORT_SYMBOL(call_usermodehelper_setup); /** * call_usermodehelper_exec - start a usermode application * @sub_info: information about the subprocessa * @wait: wait for the application to finish and return status. - * when -1 don't wait at all, but you get no useful error back when - * the program couldn't be exec'ed. This makes it safe to call + * when UMH_NO_WAIT don't wait at all, but you get no useful error back + * when the program couldn't be exec'ed. This makes it safe to call * from interrupt context. * * Runs a user-space application. The application is started * asynchronously if wait is not set, and runs as a child of keventd. * (ie. it runs with full root capabilities). */ -static int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) { DECLARE_COMPLETION_ONSTACK(done); @@ -615,31 +614,34 @@ unlock: helper_unlock(); return retval; } +EXPORT_SYMBOL(call_usermodehelper_exec); -/* - * call_usermodehelper_fns() will not run the caller-provided cleanup function - * if a memory allocation failure is experienced. So the caller might need to - * check the call_usermodehelper_fns() return value: if it is -ENOMEM, perform - * the necessaary cleanup within the caller. +/** + * call_usermodehelper() - prepare and start a usermode application + * @path: path to usermode executable + * @argv: arg vector for process + * @envp: environment for process + * @wait: wait for the application to finish and return status. + * when UMH_NO_WAIT don't wait at all, but you get no useful error back + * when the program couldn't be exec'ed. This makes it safe to call + * from interrupt context. + * + * This function is the equivalent to use call_usermodehelper_setup() and + * call_usermodehelper_exec(). */ -int call_usermodehelper_fns( - char *path, char **argv, char **envp, int wait, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data) +int call_usermodehelper(char *path, char **argv, char **envp, int wait) { struct subprocess_info *info; gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; - info = call_usermodehelper_setup(path, argv, envp, gfp_mask); - + info = call_usermodehelper_setup(path, argv, envp, gfp_mask, + NULL, NULL, NULL); if (info == NULL) return -ENOMEM; - call_usermodehelper_setfns(info, init, cleanup, data); - return call_usermodehelper_exec(info, wait); } -EXPORT_SYMBOL(call_usermodehelper_fns); +EXPORT_SYMBOL(call_usermodehelper); static int proc_cap_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) diff --git a/kernel/kthread.c b/kernel/kthread.c index a2fbbb782bad..b9db231032f4 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -52,8 +52,21 @@ enum KTHREAD_BITS { KTHREAD_IS_PARKED, }; -#define to_kthread(tsk) \ - container_of((tsk)->vfork_done, struct kthread, exited) +#define __to_kthread(vfork) \ + container_of(vfork, struct kthread, exited) + +static inline struct kthread *to_kthread(struct task_struct *k) +{ + return __to_kthread(k->vfork_done); +} + +static struct kthread *to_live_kthread(struct task_struct *k) +{ + struct completion *vfork = ACCESS_ONCE(k->vfork_done); + if (likely(vfork)) + return __to_kthread(vfork); + return NULL; +} /** * kthread_should_stop - should this kthread return now? @@ -311,19 +324,6 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), return p; } -static struct kthread *task_get_live_kthread(struct task_struct *k) -{ - struct kthread *kthread; - - get_task_struct(k); - kthread = to_kthread(k); - /* It might have exited */ - barrier(); - if (k->vfork_done != NULL) - return kthread; - return NULL; -} - /** * kthread_unpark - unpark a thread created by kthread_create(). * @k: thread created by kthread_create(). @@ -334,7 +334,7 @@ static struct kthread *task_get_live_kthread(struct task_struct *k) */ void kthread_unpark(struct task_struct *k) { - struct kthread *kthread = task_get_live_kthread(k); + struct kthread *kthread = to_live_kthread(k); if (kthread) { clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); @@ -350,7 +350,6 @@ void kthread_unpark(struct task_struct *k) wake_up_process(k); } } - put_task_struct(k); } /** @@ -367,7 +366,7 @@ void kthread_unpark(struct task_struct *k) */ int kthread_park(struct task_struct *k) { - struct kthread *kthread = task_get_live_kthread(k); + struct kthread *kthread = to_live_kthread(k); int ret = -ENOSYS; if (kthread) { @@ -380,7 +379,6 @@ int kthread_park(struct task_struct *k) } ret = 0; } - put_task_struct(k); return ret; } @@ -401,10 +399,13 @@ int kthread_park(struct task_struct *k) */ int kthread_stop(struct task_struct *k) { - struct kthread *kthread = task_get_live_kthread(k); + struct kthread *kthread; int ret; trace_sched_kthread_stop(k); + + get_task_struct(k); + kthread = to_live_kthread(k); if (kthread) { set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); @@ -412,10 +413,9 @@ int kthread_stop(struct task_struct *k) wait_for_completion(&kthread->exited); } ret = k->exit_code; - put_task_struct(k); - trace_sched_kthread_stop_ret(ret); + trace_sched_kthread_stop_ret(ret); return ret; } EXPORT_SYMBOL(kthread_stop); diff --git a/kernel/lglock.c b/kernel/lglock.c index 6535a667a5a7..86ae2aebf004 100644 --- a/kernel/lglock.c +++ b/kernel/lglock.c @@ -21,7 +21,7 @@ void lg_local_lock(struct lglock *lg) arch_spinlock_t *lock; preempt_disable(); - rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); lock = this_cpu_ptr(lg->lock); arch_spin_lock(lock); } @@ -31,7 +31,7 @@ void lg_local_unlock(struct lglock *lg) { arch_spinlock_t *lock; - rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock_release(&lg->lock_dep_map, 1, _RET_IP_); lock = this_cpu_ptr(lg->lock); arch_spin_unlock(lock); preempt_enable(); @@ -43,7 +43,7 @@ void lg_local_lock_cpu(struct lglock *lg, int cpu) arch_spinlock_t *lock; preempt_disable(); - rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); lock = per_cpu_ptr(lg->lock, cpu); arch_spin_lock(lock); } @@ -53,7 +53,7 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu) { arch_spinlock_t *lock; - rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock_release(&lg->lock_dep_map, 1, _RET_IP_); lock = per_cpu_ptr(lg->lock, cpu); arch_spin_unlock(lock); preempt_enable(); @@ -65,7 +65,7 @@ void lg_global_lock(struct lglock *lg) int i; preempt_disable(); - rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); for_each_possible_cpu(i) { arch_spinlock_t *lock; lock = per_cpu_ptr(lg->lock, i); @@ -78,7 +78,7 @@ void lg_global_unlock(struct lglock *lg) { int i; - rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock_release(&lg->lock_dep_map, 1, _RET_IP_); for_each_possible_cpu(i) { arch_spinlock_t *lock; lock = per_cpu_ptr(lg->lock, i); diff --git a/kernel/pid.c b/kernel/pid.c index 047dc6264638..6283d6412aff 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -51,9 +51,6 @@ int pid_max = PID_MAX_DEFAULT; int pid_max_min = RESERVED_PIDS + 1; int pid_max_max = PID_MAX_LIMIT; -#define BITS_PER_PAGE (PAGE_SIZE*8) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) - static inline int mk_pid(struct pid_namespace *pid_ns, struct pidmap *map, int off) { @@ -183,15 +180,19 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) break; } if (likely(atomic_read(&map->nr_free))) { - do { + for ( ; ; ) { if (!test_and_set_bit(offset, map->page)) { atomic_dec(&map->nr_free); set_last_pid(pid_ns, last, pid); return pid; } offset = find_next_offset(map, offset); + if (offset >= BITS_PER_PAGE) + break; pid = mk_pid(pid_ns, map, offset); - } while (offset < BITS_PER_PAGE && pid < pid_max); + if (pid >= pid_max) + break; + } } if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) { ++map; diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index bea15bdf82b0..69473c4a653f 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -19,8 +19,6 @@ #include <linux/reboot.h> #include <linux/export.h> -#define BITS_PER_PAGE (PAGE_SIZE*8) - struct pid_cache { int nr_ids; char name[16]; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 8fd709c9bb58..2388062ad14c 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -49,59 +49,28 @@ static int check_clock(const clockid_t which_clock) return error; } -static inline union cpu_time_count +static inline unsigned long long timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) { - union cpu_time_count ret; - ret.sched = 0; /* high half always zero when .cpu used */ + unsigned long long ret; + + ret = 0; /* high half always zero when .cpu used */ if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; + ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; } else { - ret.cpu = timespec_to_cputime(tp); + ret = cputime_to_expires(timespec_to_cputime(tp)); } return ret; } static void sample_to_timespec(const clockid_t which_clock, - union cpu_time_count cpu, + unsigned long long expires, struct timespec *tp) { if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) - *tp = ns_to_timespec(cpu.sched); + *tp = ns_to_timespec(expires); else - cputime_to_timespec(cpu.cpu, tp); -} - -static inline int cpu_time_before(const clockid_t which_clock, - union cpu_time_count now, - union cpu_time_count then) -{ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - return now.sched < then.sched; - } else { - return now.cpu < then.cpu; - } -} -static inline void cpu_time_add(const clockid_t which_clock, - union cpu_time_count *acc, - union cpu_time_count val) -{ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - acc->sched += val.sched; - } else { - acc->cpu += val.cpu; - } -} -static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, - union cpu_time_count a, - union cpu_time_count b) -{ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - a.sched -= b.sched; - } else { - a.cpu -= b.cpu; - } - return a; + cputime_to_timespec((__force cputime_t)expires, tp); } /* @@ -109,65 +78,49 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, * given the current clock sample. */ static void bump_cpu_timer(struct k_itimer *timer, - union cpu_time_count now) + unsigned long long now) { int i; + unsigned long long delta, incr; - if (timer->it.cpu.incr.sched == 0) + if (timer->it.cpu.incr == 0) return; - if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { - unsigned long long delta, incr; + if (now < timer->it.cpu.expires) + return; - if (now.sched < timer->it.cpu.expires.sched) - return; - incr = timer->it.cpu.incr.sched; - delta = now.sched + incr - timer->it.cpu.expires.sched; - /* Don't use (incr*2 < delta), incr*2 might overflow. */ - for (i = 0; incr < delta - incr; i++) - incr = incr << 1; - for (; i >= 0; incr >>= 1, i--) { - if (delta < incr) - continue; - timer->it.cpu.expires.sched += incr; - timer->it_overrun += 1 << i; - delta -= incr; - } - } else { - cputime_t delta, incr; + incr = timer->it.cpu.incr; + delta = now + incr - timer->it.cpu.expires; - if (now.cpu < timer->it.cpu.expires.cpu) - return; - incr = timer->it.cpu.incr.cpu; - delta = now.cpu + incr - timer->it.cpu.expires.cpu; - /* Don't use (incr*2 < delta), incr*2 might overflow. */ - for (i = 0; incr < delta - incr; i++) - incr += incr; - for (; i >= 0; incr = incr >> 1, i--) { - if (delta < incr) - continue; - timer->it.cpu.expires.cpu += incr; - timer->it_overrun += 1 << i; - delta -= incr; - } + /* Don't use (incr*2 < delta), incr*2 might overflow. */ + for (i = 0; incr < delta - incr; i++) + incr = incr << 1; + + for (; i >= 0; incr >>= 1, i--) { + if (delta < incr) + continue; + + timer->it.cpu.expires += incr; + timer->it_overrun += 1 << i; + delta -= incr; } } -static inline cputime_t prof_ticks(struct task_struct *p) +static inline unsigned long long prof_ticks(struct task_struct *p) { cputime_t utime, stime; task_cputime(p, &utime, &stime); - return utime + stime; + return cputime_to_expires(utime + stime); } -static inline cputime_t virt_ticks(struct task_struct *p) +static inline unsigned long long virt_ticks(struct task_struct *p) { cputime_t utime; task_cputime(p, &utime, NULL); - return utime; + return cputime_to_expires(utime); } static int @@ -208,19 +161,19 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) * Sample a per-thread clock for the given task. */ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, - union cpu_time_count *cpu) + unsigned long long *sample) { switch (CPUCLOCK_WHICH(which_clock)) { default: return -EINVAL; case CPUCLOCK_PROF: - cpu->cpu = prof_ticks(p); + *sample = prof_ticks(p); break; case CPUCLOCK_VIRT: - cpu->cpu = virt_ticks(p); + *sample = virt_ticks(p); break; case CPUCLOCK_SCHED: - cpu->sched = task_sched_runtime(p); + *sample = task_sched_runtime(p); break; } return 0; @@ -267,7 +220,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) */ static int cpu_clock_sample_group(const clockid_t which_clock, struct task_struct *p, - union cpu_time_count *cpu) + unsigned long long *sample) { struct task_cputime cputime; @@ -276,15 +229,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock, return -EINVAL; case CPUCLOCK_PROF: thread_group_cputime(p, &cputime); - cpu->cpu = cputime.utime + cputime.stime; + *sample = cputime_to_expires(cputime.utime + cputime.stime); break; case CPUCLOCK_VIRT: thread_group_cputime(p, &cputime); - cpu->cpu = cputime.utime; + *sample = cputime_to_expires(cputime.utime); break; case CPUCLOCK_SCHED: thread_group_cputime(p, &cputime); - cpu->sched = cputime.sum_exec_runtime; + *sample = cputime.sum_exec_runtime; break; } return 0; @@ -295,7 +248,7 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); int error = -EINVAL; - union cpu_time_count rtn; + unsigned long long rtn; if (pid == 0) { /* @@ -429,6 +382,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer) return ret; } +static void cleanup_timers_list(struct list_head *head, + unsigned long long curr) +{ + struct cpu_timer_list *timer, *next; + + list_for_each_entry_safe(timer, next, head, entry) + list_del_init(&timer->entry); +} + /* * Clean out CPU timers still ticking when a thread exited. The task * pointer is cleared, and the expiry time is replaced with the residual @@ -439,37 +401,12 @@ static void cleanup_timers(struct list_head *head, cputime_t utime, cputime_t stime, unsigned long long sum_exec_runtime) { - struct cpu_timer_list *timer, *next; - cputime_t ptime = utime + stime; - list_for_each_entry_safe(timer, next, head, entry) { - list_del_init(&timer->entry); - if (timer->expires.cpu < ptime) { - timer->expires.cpu = 0; - } else { - timer->expires.cpu -= ptime; - } - } - - ++head; - list_for_each_entry_safe(timer, next, head, entry) { - list_del_init(&timer->entry); - if (timer->expires.cpu < utime) { - timer->expires.cpu = 0; - } else { - timer->expires.cpu -= utime; - } - } + cputime_t ptime = utime + stime; - ++head; - list_for_each_entry_safe(timer, next, head, entry) { - list_del_init(&timer->entry); - if (timer->expires.sched < sum_exec_runtime) { - timer->expires.sched = 0; - } else { - timer->expires.sched -= sum_exec_runtime; - } - } + cleanup_timers_list(head, cputime_to_expires(ptime)); + cleanup_timers_list(++head, cputime_to_expires(utime)); + cleanup_timers_list(++head, sum_exec_runtime); } /* @@ -499,19 +436,6 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) tsk->se.sum_exec_runtime + sig->sum_sched_runtime); } -static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) -{ - /* - * That's all for this thread or process. - * We leave our residual in expires to be reported. - */ - put_task_struct(timer->it.cpu.task); - timer->it.cpu.task = NULL; - timer->it.cpu.expires = cpu_time_sub(timer->it_clock, - timer->it.cpu.expires, - now); -} - static inline int expires_gt(cputime_t expires, cputime_t new_exp) { return expires == 0 || expires > new_exp; @@ -541,14 +465,14 @@ static void arm_timer(struct k_itimer *timer) listpos = head; list_for_each_entry(next, head, entry) { - if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) + if (nt->expires < next->expires) break; listpos = &next->entry; } list_add(&nt->entry, listpos); if (listpos == head) { - union cpu_time_count *exp = &nt->expires; + unsigned long long exp = nt->expires; /* * We are the new earliest-expiring POSIX 1.b timer, hence @@ -559,17 +483,17 @@ static void arm_timer(struct k_itimer *timer) switch (CPUCLOCK_WHICH(timer->it_clock)) { case CPUCLOCK_PROF: - if (expires_gt(cputime_expires->prof_exp, exp->cpu)) - cputime_expires->prof_exp = exp->cpu; + if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp))) + cputime_expires->prof_exp = expires_to_cputime(exp); break; case CPUCLOCK_VIRT: - if (expires_gt(cputime_expires->virt_exp, exp->cpu)) - cputime_expires->virt_exp = exp->cpu; + if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp))) + cputime_expires->virt_exp = expires_to_cputime(exp); break; case CPUCLOCK_SCHED: if (cputime_expires->sched_exp == 0 || - cputime_expires->sched_exp > exp->sched) - cputime_expires->sched_exp = exp->sched; + cputime_expires->sched_exp > exp) + cputime_expires->sched_exp = exp; break; } } @@ -584,20 +508,20 @@ static void cpu_timer_fire(struct k_itimer *timer) /* * User don't want any signal. */ - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; } else if (unlikely(timer->sigq == NULL)) { /* * This a special case for clock_nanosleep, * not a normal timer from sys_timer_create. */ wake_up_process(timer->it_process); - timer->it.cpu.expires.sched = 0; - } else if (timer->it.cpu.incr.sched == 0) { + timer->it.cpu.expires = 0; + } else if (timer->it.cpu.incr == 0) { /* * One-shot timer. Clear it as soon as it's fired. */ posix_timer_event(timer, 0); - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { /* * The signal did not get queued because the signal @@ -615,7 +539,7 @@ static void cpu_timer_fire(struct k_itimer *timer) */ static int cpu_timer_sample_group(const clockid_t which_clock, struct task_struct *p, - union cpu_time_count *cpu) + unsigned long long *sample) { struct task_cputime cputime; @@ -624,13 +548,13 @@ static int cpu_timer_sample_group(const clockid_t which_clock, default: return -EINVAL; case CPUCLOCK_PROF: - cpu->cpu = cputime.utime + cputime.stime; + *sample = cputime_to_expires(cputime.utime + cputime.stime); break; case CPUCLOCK_VIRT: - cpu->cpu = cputime.utime; + *sample = cputime_to_expires(cputime.utime); break; case CPUCLOCK_SCHED: - cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); + *sample = cputime.sum_exec_runtime + task_delta_exec(p); break; } return 0; @@ -646,7 +570,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, struct itimerspec *new, struct itimerspec *old) { struct task_struct *p = timer->it.cpu.task; - union cpu_time_count old_expires, new_expires, old_incr, val; + unsigned long long old_expires, new_expires, old_incr, val; int ret; if (unlikely(p == NULL)) { @@ -701,7 +625,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, } if (old) { - if (old_expires.sched == 0) { + if (old_expires == 0) { old->it_value.tv_sec = 0; old->it_value.tv_nsec = 0; } else { @@ -716,11 +640,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, * new setting. */ bump_cpu_timer(timer, val); - if (cpu_time_before(timer->it_clock, val, - timer->it.cpu.expires)) { - old_expires = cpu_time_sub( - timer->it_clock, - timer->it.cpu.expires, val); + if (val < timer->it.cpu.expires) { + old_expires = timer->it.cpu.expires - val; sample_to_timespec(timer->it_clock, old_expires, &old->it_value); @@ -743,8 +664,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, goto out; } - if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) { - cpu_time_add(timer->it_clock, &new_expires, val); + if (new_expires != 0 && !(flags & TIMER_ABSTIME)) { + new_expires += val; } /* @@ -753,8 +674,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, * arm the timer (we'll just fake it for timer_gettime). */ timer->it.cpu.expires = new_expires; - if (new_expires.sched != 0 && - cpu_time_before(timer->it_clock, val, new_expires)) { + if (new_expires != 0 && val < new_expires) { arm_timer(timer); } @@ -778,8 +698,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, timer->it_overrun_last = 0; timer->it_overrun = -1; - if (new_expires.sched != 0 && - !cpu_time_before(timer->it_clock, val, new_expires)) { + if (new_expires != 0 && !(val < new_expires)) { /* * The designated time already passed, so we notify * immediately, even if the thread never runs to @@ -799,9 +718,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) { - union cpu_time_count now; + unsigned long long now; struct task_struct *p = timer->it.cpu.task; - int clear_dead; /* * Easy part: convert the reload time. @@ -809,28 +727,16 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) sample_to_timespec(timer->it_clock, timer->it.cpu.incr, &itp->it_interval); - if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */ + if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */ itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; return; } - if (unlikely(p == NULL)) { - /* - * This task already died and the timer will never fire. - * In this case, expires is actually the dead value. - */ - dead: - sample_to_timespec(timer->it_clock, timer->it.cpu.expires, - &itp->it_value); - return; - } - /* * Sample the clock to take the difference with the expiry time. */ if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &now); - clear_dead = p->exit_state; } else { read_lock(&tasklist_lock); if (unlikely(p->sighand == NULL)) { @@ -839,33 +745,19 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) * We can't even collect a sample any more. * Call the timer disarmed, nothing else to do. */ - put_task_struct(p); - timer->it.cpu.task = NULL; - timer->it.cpu.expires.sched = 0; + timer->it.cpu.expires = 0; + itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; read_unlock(&tasklist_lock); - goto dead; + return; } else { cpu_timer_sample_group(timer->it_clock, p, &now); - clear_dead = (unlikely(p->exit_state) && - thread_group_empty(p)); } read_unlock(&tasklist_lock); } - if (unlikely(clear_dead)) { - /* - * We've noticed that the thread is dead, but - * not yet reaped. Take this opportunity to - * drop our task ref. - */ - clear_dead_task(timer, now); - goto dead; - } - - if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) { + if (now < timer->it.cpu.expires) { sample_to_timespec(timer->it_clock, - cpu_time_sub(timer->it_clock, - timer->it.cpu.expires, now), + timer->it.cpu.expires - now, &itp->it_value); } else { /* @@ -877,6 +769,28 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) } } +static unsigned long long +check_timers_list(struct list_head *timers, + struct list_head *firing, + unsigned long long curr) +{ + int maxfire = 20; + + while (!list_empty(timers)) { + struct cpu_timer_list *t; + + t = list_first_entry(timers, struct cpu_timer_list, entry); + + if (!--maxfire || curr < t->expires) + return t->expires; + + t->firing = 1; + list_move_tail(&t->entry, firing); + } + + return 0; +} + /* * Check for any per-thread CPU timers that have fired and move them off * the tsk->cpu_timers[N] list onto the firing list. Here we update the @@ -885,54 +799,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) { - int maxfire; struct list_head *timers = tsk->cpu_timers; struct signal_struct *const sig = tsk->signal; + struct task_cputime *tsk_expires = &tsk->cputime_expires; + unsigned long long expires; unsigned long soft; - maxfire = 20; - tsk->cputime_expires.prof_exp = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) { - tsk->cputime_expires.prof_exp = t->expires.cpu; - break; - } - t->firing = 1; - list_move_tail(&t->entry, firing); - } + expires = check_timers_list(timers, firing, prof_ticks(tsk)); + tsk_expires->prof_exp = expires_to_cputime(expires); - ++timers; - maxfire = 20; - tsk->cputime_expires.virt_exp = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) { - tsk->cputime_expires.virt_exp = t->expires.cpu; - break; - } - t->firing = 1; - list_move_tail(&t->entry, firing); - } + expires = check_timers_list(++timers, firing, virt_ticks(tsk)); + tsk_expires->virt_exp = expires_to_cputime(expires); - ++timers; - maxfire = 20; - tsk->cputime_expires.sched_exp = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { - tsk->cputime_expires.sched_exp = t->expires.sched; - break; - } - t->firing = 1; - list_move_tail(&t->entry, firing); - } + tsk_expires->sched_exp = check_timers_list(++timers, firing, + tsk->se.sum_exec_runtime); /* * Check for the special case thread timers. @@ -980,7 +860,8 @@ static void stop_process_timers(struct signal_struct *sig) static u32 onecputick; static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, - cputime_t *expires, cputime_t cur_time, int signo) + unsigned long long *expires, + unsigned long long cur_time, int signo) { if (!it->expires) return; @@ -1031,9 +912,8 @@ static inline int task_cputime_zero(const struct task_cputime *cputime) static void check_process_timers(struct task_struct *tsk, struct list_head *firing) { - int maxfire; struct signal_struct *const sig = tsk->signal; - cputime_t utime, ptime, virt_expires, prof_expires; + unsigned long long utime, ptime, virt_expires, prof_expires; unsigned long long sum_sched_runtime, sched_expires; struct list_head *timers = sig->cpu_timers; struct task_cputime cputime; @@ -1043,52 +923,13 @@ static void check_process_timers(struct task_struct *tsk, * Collect the current process totals. */ thread_group_cputimer(tsk, &cputime); - utime = cputime.utime; - ptime = utime + cputime.stime; + utime = cputime_to_expires(cputime.utime); + ptime = utime + cputime_to_expires(cputime.stime); sum_sched_runtime = cputime.sum_exec_runtime; - maxfire = 20; - prof_expires = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *tl = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || ptime < tl->expires.cpu) { - prof_expires = tl->expires.cpu; - break; - } - tl->firing = 1; - list_move_tail(&tl->entry, firing); - } - - ++timers; - maxfire = 20; - virt_expires = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *tl = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || utime < tl->expires.cpu) { - virt_expires = tl->expires.cpu; - break; - } - tl->firing = 1; - list_move_tail(&tl->entry, firing); - } - ++timers; - maxfire = 20; - sched_expires = 0; - while (!list_empty(timers)) { - struct cpu_timer_list *tl = list_first_entry(timers, - struct cpu_timer_list, - entry); - if (!--maxfire || sum_sched_runtime < tl->expires.sched) { - sched_expires = tl->expires.sched; - break; - } - tl->firing = 1; - list_move_tail(&tl->entry, firing); - } + prof_expires = check_timers_list(timers, firing, ptime); + virt_expires = check_timers_list(++timers, firing, utime); + sched_expires = check_timers_list(++timers, firing, sum_sched_runtime); /* * Check for the special case process timers. @@ -1127,8 +968,8 @@ static void check_process_timers(struct task_struct *tsk, } } - sig->cputime_expires.prof_exp = prof_expires; - sig->cputime_expires.virt_exp = virt_expires; + sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires); + sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires); sig->cputime_expires.sched_exp = sched_expires; if (task_cputime_zero(&sig->cputime_expires)) stop_process_timers(sig); @@ -1141,13 +982,7 @@ static void check_process_timers(struct task_struct *tsk, void posix_cpu_timer_schedule(struct k_itimer *timer) { struct task_struct *p = timer->it.cpu.task; - union cpu_time_count now; - - if (unlikely(p == NULL)) - /* - * The task was cleaned up already, no future firings. - */ - goto out; + unsigned long long now; /* * Fetch the current sample and update the timer's expiry time. @@ -1155,10 +990,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &now); bump_cpu_timer(timer, now); - if (unlikely(p->exit_state)) { - clear_dead_task(timer, now); - goto out; - } read_lock(&tasklist_lock); /* arm_timer needs it. */ spin_lock(&p->sighand->siglock); } else { @@ -1170,15 +1001,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) */ put_task_struct(p); timer->it.cpu.task = p = NULL; - timer->it.cpu.expires.sched = 0; - goto out_unlock; - } else if (unlikely(p->exit_state) && thread_group_empty(p)) { - /* - * We've noticed that the thread is dead, but - * not yet reaped. Take this opportunity to - * drop our task ref. - */ - clear_dead_task(timer, now); + timer->it.cpu.expires = 0; goto out_unlock; } spin_lock(&p->sighand->siglock); @@ -1197,7 +1020,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) out_unlock: read_unlock(&tasklist_lock); -out: timer->it_overrun_last = timer->it_overrun; timer->it_overrun = -1; ++timer->it_requeue_pending; @@ -1345,7 +1167,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval) { - union cpu_time_count now; + unsigned long long now; BUG_ON(clock_idx == CPUCLOCK_SCHED); cpu_timer_sample_group(clock_idx, tsk, &now); @@ -1357,17 +1179,17 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, * it to be absolute. */ if (*oldval) { - if (*oldval <= now.cpu) { + if (*oldval <= now) { /* Just about to fire. */ *oldval = cputime_one_jiffy; } else { - *oldval -= now.cpu; + *oldval -= now; } } if (!*newval) return; - *newval += now.cpu; + *newval += now; } /* @@ -1415,7 +1237,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, } while (!signal_pending(current)) { - if (timer.it.cpu.expires.sched == 0) { + if (timer.it.cpu.expires == 0) { /* * Our timer fired and was reset, below * deletion can not fail. diff --git a/kernel/printk.c b/kernel/printk.c index a0584ceb4b4c..001d005b8612 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -32,6 +32,7 @@ #include <linux/security.h> #include <linux/bootmem.h> #include <linux/memblock.h> +#include <linux/aio.h> #include <linux/syscalls.h> #include <linux/kexec.h> #include <linux/kdb.h> @@ -49,13 +50,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/printk.h> -/* - * Architectures can override it: - */ -void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) -{ -} - /* printk's without a loglevel use this.. */ #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL @@ -608,7 +602,8 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) /* return error when data has vanished underneath us */ if (user->seq < log_first_seq) ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI; - ret = POLLIN|POLLRDNORM; + else + ret = POLLIN|POLLRDNORM; } raw_spin_unlock_irq(&logbuf_lock); @@ -1265,7 +1260,7 @@ static void call_console_drivers(int level, const char *text, size_t len) { struct console *con; - trace_console(text, 0, len, len); + trace_console(text, len); if (!console_drivers) return; @@ -1724,6 +1719,29 @@ static size_t cont_print_text(char *text, size_t size) { return 0; } #endif /* CONFIG_PRINTK */ +#ifdef CONFIG_EARLY_PRINTK +struct console *early_console; + +void early_vprintk(const char *fmt, va_list ap) +{ + if (early_console) { + char buf[512]; + int n = vscnprintf(buf, sizeof(buf), fmt, ap); + + early_console->write(early_console, buf, n); + } +} + +asmlinkage void early_printk(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + early_vprintk(fmt, ap); + va_end(ap); +} +#endif + static int __add_preferred_console(char *name, int idx, char *options, char *brl_options) { diff --git a/kernel/ptrace.c b/kernel/ptrace.c index acbd28424d81..aed981a3f69c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -17,6 +17,7 @@ #include <linux/ptrace.h> #include <linux/security.h> #include <linux/signal.h> +#include <linux/uio.h> #include <linux/audit.h> #include <linux/pid_namespace.h> #include <linux/syscalls.h> @@ -24,6 +25,7 @@ #include <linux/regset.h> #include <linux/hw_breakpoint.h> #include <linux/cn_proc.h> +#include <linux/compat.h> static int ptrace_trapping_sleep_fn(void *flags) @@ -618,6 +620,81 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) return error; } +static int ptrace_peek_siginfo(struct task_struct *child, + unsigned long addr, + unsigned long data) +{ + struct ptrace_peeksiginfo_args arg; + struct sigpending *pending; + struct sigqueue *q; + int ret, i; + + ret = copy_from_user(&arg, (void __user *) addr, + sizeof(struct ptrace_peeksiginfo_args)); + if (ret) + return -EFAULT; + + if (arg.flags & ~PTRACE_PEEKSIGINFO_SHARED) + return -EINVAL; /* unknown flags */ + + if (arg.nr < 0) + return -EINVAL; + + if (arg.flags & PTRACE_PEEKSIGINFO_SHARED) + pending = &child->signal->shared_pending; + else + pending = &child->pending; + + for (i = 0; i < arg.nr; ) { + siginfo_t info; + s32 off = arg.off + i; + + spin_lock_irq(&child->sighand->siglock); + list_for_each_entry(q, &pending->list, list) { + if (!off--) { + copy_siginfo(&info, &q->info); + break; + } + } + spin_unlock_irq(&child->sighand->siglock); + + if (off >= 0) /* beyond the end of the list */ + break; + +#ifdef CONFIG_COMPAT + if (unlikely(is_compat_task())) { + compat_siginfo_t __user *uinfo = compat_ptr(data); + + ret = copy_siginfo_to_user32(uinfo, &info); + ret |= __put_user(info.si_code, &uinfo->si_code); + } else +#endif + { + siginfo_t __user *uinfo = (siginfo_t __user *) data; + + ret = copy_siginfo_to_user(uinfo, &info); + ret |= __put_user(info.si_code, &uinfo->si_code); + } + + if (ret) { + ret = -EFAULT; + break; + } + + data += sizeof(siginfo_t); + i++; + + if (signal_pending(current)) + break; + + cond_resched(); + } + + if (i > 0) + return i; + + return ret; +} #ifdef PTRACE_SINGLESTEP #define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) @@ -748,6 +825,10 @@ int ptrace_request(struct task_struct *child, long request, ret = put_user(child->ptrace_message, datalp); break; + case PTRACE_PEEKSIGINFO: + ret = ptrace_peek_siginfo(child, addr, data); + break; + case PTRACE_GETSIGINFO: ret = ptrace_getsiginfo(child, &siginfo); if (!ret) diff --git a/kernel/range.c b/kernel/range.c index 9b8ae2d6ed68..071b0ab455cb 100644 --- a/kernel/range.c +++ b/kernel/range.c @@ -97,7 +97,8 @@ void subtract_range(struct range *range, int az, u64 start, u64 end) range[i].end = range[j].end; range[i].start = end; } else { - printk(KERN_ERR "run of slot in ranges\n"); + pr_err("%s: run out of slot in ranges\n", + __func__); } range[j].end = start; continue; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2d5f94c1c7fb..d8534308fd05 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1441,7 +1441,7 @@ static int rcu_gp_init(struct rcu_state *rsp) rnp->grphi, rnp->qsmask); raw_spin_unlock_irq(&rnp->lock); #ifdef CONFIG_PROVE_RCU_DELAY - if ((random32() % (rcu_num_nodes * 8)) == 0 && + if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 && system_state == SYSTEM_RUNNING) schedule_timeout_uninterruptible(2); #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ diff --git a/kernel/relay.c b/kernel/relay.c index 01ab081ac53a..eef0d113b79e 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -588,7 +588,7 @@ struct rchan *relay_open(const char *base_filename, chan->version = RELAYFS_CHANNEL_VERSION; chan->n_subbufs = n_subbufs; chan->subbuf_size = subbuf_size; - chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs); + chan->alloc_size = PAGE_ALIGN(subbuf_size * n_subbufs); chan->parent = parent; chan->private_data = private_data; if (base_filename) { @@ -1099,8 +1099,7 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf, static int subbuf_read_actor(size_t read_start, struct rchan_buf *buf, size_t avail, - read_descriptor_t *desc, - read_actor_t actor) + read_descriptor_t *desc) { void *from; int ret = 0; @@ -1121,15 +1120,13 @@ static int subbuf_read_actor(size_t read_start, typedef int (*subbuf_actor_t) (size_t read_start, struct rchan_buf *buf, size_t avail, - read_descriptor_t *desc, - read_actor_t actor); + read_descriptor_t *desc); /* * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries */ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, subbuf_actor_t subbuf_actor, - read_actor_t actor, read_descriptor_t *desc) { struct rchan_buf *buf = filp->private_data; @@ -1150,7 +1147,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, break; avail = min(desc->count, avail); - ret = subbuf_actor(read_start, buf, avail, desc, actor); + ret = subbuf_actor(read_start, buf, avail, desc); if (desc->error < 0) break; @@ -1174,8 +1171,7 @@ static ssize_t relay_file_read(struct file *filp, desc.count = count; desc.arg.buf = buffer; desc.error = 0; - return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, - NULL, &desc); + return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc); } static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 4567fc020fe3..6815171a4fff 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -193,7 +193,7 @@ EXPORT_SYMBOL(up); struct semaphore_waiter { struct list_head list; struct task_struct *task; - int up; + bool up; }; /* @@ -209,12 +209,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state, list_add_tail(&waiter.list, &sem->wait_list); waiter.task = task; - waiter.up = 0; + waiter.up = false; for (;;) { if (signal_pending_state(state, task)) goto interrupted; - if (timeout <= 0) + if (unlikely(timeout <= 0)) goto timed_out; __set_task_state(task, state); raw_spin_unlock_irq(&sem->lock); @@ -258,6 +258,6 @@ static noinline void __sched __up(struct semaphore *sem) struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, struct semaphore_waiter, list); list_del(&waiter->list); - waiter->up = 1; + waiter->up = true; wake_up_process(waiter->task); } diff --git a/kernel/signal.c b/kernel/signal.c index 497330ec2ae9..bb7ca79a97b2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -855,12 +855,14 @@ static void ptrace_trap_notify(struct task_struct *t) * Returns true if the signal should be actually delivered, otherwise * it should be dropped. */ -static int prepare_signal(int sig, struct task_struct *p, bool force) +static bool prepare_signal(int sig, struct task_struct *p, bool force) { struct signal_struct *signal = p->signal; struct task_struct *t; - if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) { + if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) { + if (signal->flags & SIGNAL_GROUP_COREDUMP) + return sig == SIGKILL; /* * The process is in the middle of dying, nothing to do. */ diff --git a/kernel/smp.c b/kernel/smp.c index 8e451f3ff51b..31670c8d8f89 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -12,6 +12,7 @@ #include <linux/gfp.h> #include <linux/smp.h> #include <linux/cpu.h> +#include <linux/hardirq.h> #include "smpboot.h" @@ -100,16 +101,16 @@ void __init call_function_init(void) * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd. */ -static void csd_lock_wait(struct call_single_data *data) +static void csd_lock_wait(struct call_single_data *csd) { - while (data->flags & CSD_FLAG_LOCK) + while (csd->flags & CSD_FLAG_LOCK) cpu_relax(); } -static void csd_lock(struct call_single_data *data) +static void csd_lock(struct call_single_data *csd) { - csd_lock_wait(data); - data->flags = CSD_FLAG_LOCK; + csd_lock_wait(csd); + csd->flags = CSD_FLAG_LOCK; /* * prevent CPU from reordering the above assignment @@ -119,16 +120,16 @@ static void csd_lock(struct call_single_data *data) smp_mb(); } -static void csd_unlock(struct call_single_data *data) +static void csd_unlock(struct call_single_data *csd) { - WARN_ON(!(data->flags & CSD_FLAG_LOCK)); + WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); /* * ensure we're all done before releasing data: */ smp_mb(); - data->flags &= ~CSD_FLAG_LOCK; + csd->flags &= ~CSD_FLAG_LOCK; } /* @@ -137,7 +138,7 @@ static void csd_unlock(struct call_single_data *data) * ->func, ->info, and ->flags set. */ static -void generic_exec_single(int cpu, struct call_single_data *data, int wait) +void generic_exec_single(int cpu, struct call_single_data *csd, int wait) { struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); unsigned long flags; @@ -145,7 +146,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) raw_spin_lock_irqsave(&dst->lock, flags); ipi = list_empty(&dst->list); - list_add_tail(&data->list, &dst->list); + list_add_tail(&csd->list, &dst->list); raw_spin_unlock_irqrestore(&dst->lock, flags); /* @@ -163,7 +164,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) arch_send_call_function_single_ipi(cpu); if (wait) - csd_lock_wait(data); + csd_lock_wait(csd); } /* @@ -173,7 +174,6 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) void generic_smp_call_function_single_interrupt(void) { struct call_single_queue *q = &__get_cpu_var(call_single_queue); - unsigned int data_flags; LIST_HEAD(list); /* @@ -186,25 +186,26 @@ void generic_smp_call_function_single_interrupt(void) raw_spin_unlock(&q->lock); while (!list_empty(&list)) { - struct call_single_data *data; + struct call_single_data *csd; + unsigned int csd_flags; - data = list_entry(list.next, struct call_single_data, list); - list_del(&data->list); + csd = list_entry(list.next, struct call_single_data, list); + list_del(&csd->list); /* - * 'data' can be invalid after this call if flags == 0 + * 'csd' can be invalid after this call if flags == 0 * (when called through generic_exec_single()), * so save them away before making the call: */ - data_flags = data->flags; + csd_flags = csd->flags; - data->func(data->info); + csd->func(csd->info); /* * Unlocked CSDs are valid through generic_exec_single(): */ - if (data_flags & CSD_FLAG_LOCK) - csd_unlock(data); + if (csd_flags & CSD_FLAG_LOCK) + csd_unlock(csd); } } @@ -240,8 +241,9 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, * send smp call function interrupt to this cpu and as such deadlocks * can't happen. */ - WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() - && !oops_in_progress); + WARN_ON_ONCE(cpu_online(this_cpu) + && (irqs_disabled() || in_serving_irq()) + && !oops_in_progress); if (cpu == this_cpu) { local_irq_save(flags); @@ -249,16 +251,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, local_irq_restore(flags); } else { if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { - struct call_single_data *data = &d; + struct call_single_data *csd = &d; if (!wait) - data = &__get_cpu_var(csd_data); + csd = &__get_cpu_var(csd_data); - csd_lock(data); + csd_lock(csd); - data->func = func; - data->info = info; - generic_exec_single(cpu, data, wait); + csd->func = func; + csd->info = info; + generic_exec_single(cpu, csd, wait); } else { err = -ENXIO; /* CPU not online */ } @@ -325,7 +327,7 @@ EXPORT_SYMBOL_GPL(smp_call_function_any); * pre-allocated data structure. Useful for embedding @data inside * other structures, for instance. */ -void __smp_call_function_single(int cpu, struct call_single_data *data, +void __smp_call_function_single(int cpu, struct call_single_data *csd, int wait) { unsigned int this_cpu; @@ -343,11 +345,11 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, if (cpu == this_cpu) { local_irq_save(flags); - data->func(data->info); + csd->func(csd->info); local_irq_restore(flags); } else { - csd_lock(data); - generic_exec_single(cpu, data, wait); + csd_lock(csd); + generic_exec_single(cpu, csd, wait); } put_cpu(); } @@ -369,7 +371,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait) { - struct call_function_data *data; + struct call_function_data *cfd; int cpu, next_cpu, this_cpu = smp_processor_id(); /* @@ -378,8 +380,9 @@ void smp_call_function_many(const struct cpumask *mask, * send smp call function interrupt to this cpu and as such deadlocks * can't happen. */ - WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() - && !oops_in_progress && !early_boot_irqs_disabled); + WARN_ON_ONCE(cpu_online(this_cpu) + && (irqs_disabled() || in_serving_irq()) + && !oops_in_progress && !early_boot_irqs_disabled); /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ cpu = cpumask_first_and(mask, cpu_online_mask); @@ -401,24 +404,24 @@ void smp_call_function_many(const struct cpumask *mask, return; } - data = &__get_cpu_var(cfd_data); + cfd = &__get_cpu_var(cfd_data); - cpumask_and(data->cpumask, mask, cpu_online_mask); - cpumask_clear_cpu(this_cpu, data->cpumask); + cpumask_and(cfd->cpumask, mask, cpu_online_mask); + cpumask_clear_cpu(this_cpu, cfd->cpumask); /* Some callers race with other cpus changing the passed mask */ - if (unlikely(!cpumask_weight(data->cpumask))) + if (unlikely(!cpumask_weight(cfd->cpumask))) return; /* - * After we put an entry into the list, data->cpumask - * may be cleared again when another CPU sends another IPI for - * a SMP function call, so data->cpumask will be zero. + * After we put an entry into the list, cfd->cpumask may be cleared + * again when another CPU sends another IPI for a SMP function call, so + * cfd->cpumask will be zero. */ - cpumask_copy(data->cpumask_ipi, data->cpumask); + cpumask_copy(cfd->cpumask_ipi, cfd->cpumask); - for_each_cpu(cpu, data->cpumask) { - struct call_single_data *csd = per_cpu_ptr(data->csd, cpu); + for_each_cpu(cpu, cfd->cpumask) { + struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); unsigned long flags; @@ -433,12 +436,13 @@ void smp_call_function_many(const struct cpumask *mask, } /* Send a message to all CPUs in the map */ - arch_send_call_function_ipi_mask(data->cpumask_ipi); + arch_send_call_function_ipi_mask(cfd->cpumask_ipi); if (wait) { - for_each_cpu(cpu, data->cpumask) { - struct call_single_data *csd = - per_cpu_ptr(data->csd, cpu); + for_each_cpu(cpu, cfd->cpumask) { + struct call_single_data *csd; + + csd = per_cpu_ptr(cfd->csd, cpu); csd_lock_wait(csd); } } diff --git a/kernel/sys.c b/kernel/sys.c index 4b6fff5e7400..b95d3c72ba21 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -49,6 +49,11 @@ #include <linux/user_namespace.h> #include <linux/binfmts.h> +#include <linux/sched.h> +#include <linux/rcupdate.h> +#include <linux/uidgid.h> +#include <linux/cred.h> + #include <linux/kmsg_dump.h> /* Move somewhere else to avoid recompiling? */ #include <generated/utsrelease.h> @@ -1044,6 +1049,67 @@ change_okay: return old_fsgid; } +/** + * sys_getpid - return the thread group id of the current process + * + * Note, despite the name, this returns the tgid not the pid. The tgid and + * the pid are identical unless CLONE_THREAD was specified on clone() in + * which case the tgid is the same in all threads of the same group. + * + * This is SMP safe as current->tgid does not change. + */ +SYSCALL_DEFINE0(getpid) +{ + return task_tgid_vnr(current); +} + +/* Thread ID - the internal kernel "pid" */ +SYSCALL_DEFINE0(gettid) +{ + return task_pid_vnr(current); +} + +/* + * Accessing ->real_parent is not SMP-safe, it could + * change from under us. However, we can use a stale + * value of ->real_parent under rcu_read_lock(), see + * release_task()->call_rcu(delayed_put_task_struct). + */ +SYSCALL_DEFINE0(getppid) +{ + int pid; + + rcu_read_lock(); + pid = task_tgid_vnr(rcu_dereference(current->real_parent)); + rcu_read_unlock(); + + return pid; +} + +SYSCALL_DEFINE0(getuid) +{ + /* Only we change this so SMP safe */ + return from_kuid_munged(current_user_ns(), current_uid()); +} + +SYSCALL_DEFINE0(geteuid) +{ + /* Only we change this so SMP safe */ + return from_kuid_munged(current_user_ns(), current_euid()); +} + +SYSCALL_DEFINE0(getgid) +{ + /* Only we change this so SMP safe */ + return from_kgid_munged(current_user_ns(), current_gid()); +} + +SYSCALL_DEFINE0(getegid) +{ + /* Only we change this so SMP safe */ + return from_kgid_munged(current_user_ns(), current_egid()); +} + void do_sys_times(struct tms *tms) { cputime_t tgutime, tgstime, cutime, cstime; @@ -1805,7 +1871,6 @@ SYSCALL_DEFINE1(umask, int, mask) return mask; } -#ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { struct fd exe; @@ -1999,17 +2064,12 @@ out: return error; } +#ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) { return put_user(me->clear_child_tid, tid_addr); } - -#else /* CONFIG_CHECKPOINT_RESTORE */ -static int prctl_set_mm(int opt, unsigned long addr, - unsigned long arg4, unsigned long arg5) -{ - return -EINVAL; -} +#else static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) { return -EINVAL; @@ -2259,3 +2319,148 @@ int orderly_poweroff(bool force) return 0; } EXPORT_SYMBOL_GPL(orderly_poweroff); + +/** + * do_sysinfo - fill in sysinfo struct + * @info: pointer to buffer to fill + */ +static int do_sysinfo(struct sysinfo *info) +{ + unsigned long mem_total, sav_total; + unsigned int mem_unit, bitcount; + struct timespec tp; + + memset(info, 0, sizeof(struct sysinfo)); + + ktime_get_ts(&tp); + monotonic_to_bootbased(&tp); + info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); + + get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); + + info->procs = nr_threads; + + si_meminfo(info); + si_swapinfo(info); + + /* + * If the sum of all the available memory (i.e. ram + swap) + * is less than can be stored in a 32 bit unsigned long then + * we can be binary compatible with 2.2.x kernels. If not, + * well, in that case 2.2.x was broken anyways... + * + * -Erik Andersen <andersee@debian.org> + */ + + mem_total = info->totalram + info->totalswap; + if (mem_total < info->totalram || mem_total < info->totalswap) + goto out; + bitcount = 0; + mem_unit = info->mem_unit; + while (mem_unit > 1) { + bitcount++; + mem_unit >>= 1; + sav_total = mem_total; + mem_total <<= 1; + if (mem_total < sav_total) + goto out; + } + + /* + * If mem_total did not overflow, multiply all memory values by + * info->mem_unit and set it to 1. This leaves things compatible + * with 2.2.x, and also retains compatibility with earlier 2.4.x + * kernels... + */ + + info->mem_unit = 1; + info->totalram <<= bitcount; + info->freeram <<= bitcount; + info->sharedram <<= bitcount; + info->bufferram <<= bitcount; + info->totalswap <<= bitcount; + info->freeswap <<= bitcount; + info->totalhigh <<= bitcount; + info->freehigh <<= bitcount; + +out: + return 0; +} + +SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) +{ + struct sysinfo val; + + do_sysinfo(&val); + + if (copy_to_user(info, &val, sizeof(struct sysinfo))) + return -EFAULT; + + return 0; +} + +#ifdef CONFIG_COMPAT +struct compat_sysinfo { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + u16 procs; + u16 pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(u32)-sizeof(int)]; +}; + +COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) +{ + struct sysinfo s; + + do_sysinfo(&s); + + /* Check to see if any memory value is too large for 32-bit and scale + * down if needed + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + int bitcount = 0; + + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || + __put_user(s.uptime, &info->uptime) || + __put_user(s.loads[0], &info->loads[0]) || + __put_user(s.loads[1], &info->loads[1]) || + __put_user(s.loads[2], &info->loads[2]) || + __put_user(s.totalram, &info->totalram) || + __put_user(s.freeram, &info->freeram) || + __put_user(s.sharedram, &info->sharedram) || + __put_user(s.bufferram, &info->bufferram) || + __put_user(s.totalswap, &info->totalswap) || + __put_user(s.freeswap, &info->freeswap) || + __put_user(s.procs, &info->procs) || + __put_user(s.totalhigh, &info->totalhigh) || + __put_user(s.freehigh, &info->freehigh) || + __put_user(s.mem_unit, &info->mem_unit)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_COMPAT */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index afc1dc60f3f8..9edcf456e0fc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -106,7 +106,6 @@ extern unsigned int core_pipe_limit; #endif extern int pid_max; extern int pid_max_min, pid_max_max; -extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; extern int compat_log; extern int latencytop_enabled; @@ -1430,6 +1429,20 @@ static struct ctl_table vm_table[] = { .extra2 = &one, }, #endif + { + .procname = "user_reserve_kbytes", + .data = &sysctl_user_reserve_kbytes, + .maxlen = sizeof(sysctl_user_reserve_kbytes), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "admin_reserve_kbytes", + .data = &sysctl_admin_reserve_kbytes, + .maxlen = sizeof(sysctl_admin_reserve_kbytes), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, { } }; diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c index f8b11a283171..12d6ebbfdd83 100644 --- a/kernel/test_kprobes.c +++ b/kernel/test_kprobes.c @@ -365,7 +365,7 @@ int init_test_probes(void) target2 = kprobe_target2; do { - rand1 = random32(); + rand1 = prandom_u32(); } while (rand1 <= div_factor); printk(KERN_INFO "Kprobe smoke test started\n"); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index af5a7e9f164b..3bdf28323012 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -20,6 +20,13 @@ #include <asm/uaccess.h> + +struct timer_list_iter { + int cpu; + bool second_pass; + u64 now; +}; + typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); @@ -133,7 +140,6 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; - SEQ_printf(m, "\n"); SEQ_printf(m, "cpu: %d\n", cpu); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { SEQ_printf(m, " clock %d:\n", i); @@ -187,6 +193,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) #undef P #undef P_ns + SEQ_printf(m, "\n"); } #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -195,7 +202,6 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) { struct clock_event_device *dev = td->evtdev; - SEQ_printf(m, "\n"); SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); if (cpu < 0) SEQ_printf(m, "Broadcast device\n"); @@ -230,12 +236,11 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->event_handler); SEQ_printf(m, "\n"); SEQ_printf(m, " retries: %lu\n", dev->retries); + SEQ_printf(m, "\n"); } -static void timer_list_show_tickdevices(struct seq_file *m) +static void timer_list_show_tickdevices_header(struct seq_file *m) { - int cpu; - #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST print_tickdevice(m, tick_get_broadcast_device(), -1); SEQ_printf(m, "tick_broadcast_mask: %08lx\n", @@ -246,47 +251,104 @@ static void timer_list_show_tickdevices(struct seq_file *m) #endif SEQ_printf(m, "\n"); #endif - for_each_online_cpu(cpu) - print_tickdevice(m, tick_get_device(cpu), cpu); - SEQ_printf(m, "\n"); } -#else -static void timer_list_show_tickdevices(struct seq_file *m) { } #endif +static inline void timer_list_header(struct seq_file *m, u64 now) +{ + SEQ_printf(m, "Timer List Version: v0.7\n"); + SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); + SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); + SEQ_printf(m, "\n"); +} + static int timer_list_show(struct seq_file *m, void *v) { + struct timer_list_iter *iter = v; + u64 now = ktime_to_ns(ktime_get()); + + if (iter->cpu == -1 && !iter->second_pass) + timer_list_header(m, now); + else if (!iter->second_pass) + print_cpu(m, iter->cpu, iter->now); +#ifdef CONFIG_GENERIC_CLOCKEVENTS + else if (iter->cpu == -1 && iter->second_pass) + timer_list_show_tickdevices_header(m); + else + print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu); +#endif + return 0; +} + +void sysrq_timer_list_show(void) +{ u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Timer List Version: v0.7\n"); - SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); - SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); + timer_list_header(NULL, now); for_each_online_cpu(cpu) - print_cpu(m, cpu, now); + print_cpu(NULL, cpu, now); - SEQ_printf(m, "\n"); - timer_list_show_tickdevices(m); +#ifdef CONFIG_GENERIC_CLOCKEVENTS + timer_list_show_tickdevices_header(NULL); + for_each_online_cpu(cpu) + print_tickdevice(NULL, tick_get_device(cpu), cpu); +#endif + return; +} - return 0; +static void *timer_list_start(struct seq_file *file, loff_t *offset) +{ + struct timer_list_iter *iter = file->private; + + if (!*offset) { + iter->cpu = -1; + iter->now = ktime_to_ns(ktime_get()); + } else if (iter->cpu >= nr_cpu_ids) { +#ifdef CONFIG_GENERIC_CLOCKEVENTS + if (!iter->second_pass) { + iter->cpu = -1; + iter->second_pass = true; + } else + return NULL; +#else + return NULL; +#endif + } + return iter; } -void sysrq_timer_list_show(void) +static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset) +{ + struct timer_list_iter *iter = file->private; + iter->cpu = cpumask_next(iter->cpu, cpu_online_mask); + ++*offset; + return timer_list_start(file, offset); +} + +static void timer_list_stop(struct seq_file *seq, void *v) { - timer_list_show(NULL, NULL); } +static const struct seq_operations timer_list_sops = { + .start = timer_list_start, + .next = timer_list_next, + .stop = timer_list_stop, + .show = timer_list_show, +}; + static int timer_list_open(struct inode *inode, struct file *filp) { - return single_open(filp, timer_list_show, NULL); + return seq_open_private(filp, &timer_list_sops, + sizeof(struct timer_list_iter)); } static const struct file_operations timer_list_fops = { .open = timer_list_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = seq_release_private, }; static int __init init_timer_list_procfs(void) diff --git a/kernel/timer.c b/kernel/timer.c index 1b7489fdea41..a860bba34412 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1,7 +1,7 @@ /* * linux/kernel/timer.c * - * Kernel internal timers, basic process system calls + * Kernel internal timers * * Copyright (C) 1991, 1992 Linus Torvalds * @@ -41,6 +41,7 @@ #include <linux/sched.h> #include <linux/sched/sysctl.h> #include <linux/slab.h> +#include <linux/compat.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -1395,61 +1396,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) #endif -/** - * sys_getpid - return the thread group id of the current process - * - * Note, despite the name, this returns the tgid not the pid. The tgid and - * the pid are identical unless CLONE_THREAD was specified on clone() in - * which case the tgid is the same in all threads of the same group. - * - * This is SMP safe as current->tgid does not change. - */ -SYSCALL_DEFINE0(getpid) -{ - return task_tgid_vnr(current); -} - -/* - * Accessing ->real_parent is not SMP-safe, it could - * change from under us. However, we can use a stale - * value of ->real_parent under rcu_read_lock(), see - * release_task()->call_rcu(delayed_put_task_struct). - */ -SYSCALL_DEFINE0(getppid) -{ - int pid; - - rcu_read_lock(); - pid = task_tgid_vnr(rcu_dereference(current->real_parent)); - rcu_read_unlock(); - - return pid; -} - -SYSCALL_DEFINE0(getuid) -{ - /* Only we change this so SMP safe */ - return from_kuid_munged(current_user_ns(), current_uid()); -} - -SYSCALL_DEFINE0(geteuid) -{ - /* Only we change this so SMP safe */ - return from_kuid_munged(current_user_ns(), current_euid()); -} - -SYSCALL_DEFINE0(getgid) -{ - /* Only we change this so SMP safe */ - return from_kgid_munged(current_user_ns(), current_gid()); -} - -SYSCALL_DEFINE0(getegid) -{ - /* Only we change this so SMP safe */ - return from_kgid_munged(current_user_ns(), current_egid()); -} - static void process_timeout(unsigned long __data) { wake_up_process((struct task_struct *)__data); @@ -1557,91 +1503,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -/* Thread ID - the internal kernel "pid" */ -SYSCALL_DEFINE0(gettid) -{ - return task_pid_vnr(current); -} - -/** - * do_sysinfo - fill in sysinfo struct - * @info: pointer to buffer to fill - */ -int do_sysinfo(struct sysinfo *info) -{ - unsigned long mem_total, sav_total; - unsigned int mem_unit, bitcount; - struct timespec tp; - - memset(info, 0, sizeof(struct sysinfo)); - - ktime_get_ts(&tp); - monotonic_to_bootbased(&tp); - info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); - - get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); - - info->procs = nr_threads; - - si_meminfo(info); - si_swapinfo(info); - - /* - * If the sum of all the available memory (i.e. ram + swap) - * is less than can be stored in a 32 bit unsigned long then - * we can be binary compatible with 2.2.x kernels. If not, - * well, in that case 2.2.x was broken anyways... - * - * -Erik Andersen <andersee@debian.org> - */ - - mem_total = info->totalram + info->totalswap; - if (mem_total < info->totalram || mem_total < info->totalswap) - goto out; - bitcount = 0; - mem_unit = info->mem_unit; - while (mem_unit > 1) { - bitcount++; - mem_unit >>= 1; - sav_total = mem_total; - mem_total <<= 1; - if (mem_total < sav_total) - goto out; - } - - /* - * If mem_total did not overflow, multiply all memory values by - * info->mem_unit and set it to 1. This leaves things compatible - * with 2.2.x, and also retains compatibility with earlier 2.4.x - * kernels... - */ - - info->mem_unit = 1; - info->totalram <<= bitcount; - info->freeram <<= bitcount; - info->sharedram <<= bitcount; - info->bufferram <<= bitcount; - info->totalswap <<= bitcount; - info->freeswap <<= bitcount; - info->totalhigh <<= bitcount; - info->freehigh <<= bitcount; - -out: - return 0; -} - -SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) -{ - struct sysinfo val; - - do_sysinfo(&val); - - if (copy_to_user(info, &val, sizeof(struct sysinfo))) - return -EFAULT; - - return 0; -} - static int __cpuinit init_timers_cpu(int cpu) { int j; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 05039e348f07..ea741c32d596 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -239,10 +239,12 @@ static void watchdog_overflow_callback(struct perf_event *event, if (__this_cpu_read(hard_watchdog_warn) == true) return; - if (hardlockup_panic) + if (hardlockup_panic) { + trigger_all_cpu_backtrace(); panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu); - else + } else { WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); + } __this_cpu_write(hard_watchdog_warn, true); return; @@ -323,8 +325,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) else dump_stack(); - if (softlockup_panic) + if (softlockup_panic) { + trigger_all_cpu_backtrace(); panic("softlockup: hung tasks"); + } __this_cpu_write(soft_watchdog_warn, true); } else __this_cpu_write(soft_watchdog_warn, false); |