diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/verifier.c | 12 | ||||
-rw-r--r-- | kernel/fork.c | 141 | ||||
-rw-r--r-- | kernel/gcov/base.c | 5 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 16 | ||||
-rw-r--r-- | kernel/pid.c | 15 | ||||
-rw-r--r-- | kernel/printk/printk.c | 53 | ||||
-rw-r--r-- | kernel/ptrace.c | 39 | ||||
-rw-r--r-- | kernel/signal.c | 14 | ||||
-rw-r--r-- | kernel/smp.c | 80 | ||||
-rw-r--r-- | kernel/sys.c | 47 | ||||
-rw-r--r-- | kernel/sysctl.c | 16 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 2 |
12 files changed, 284 insertions, 156 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 630a7bac1e51..47dcd3aa6e23 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1397,7 +1397,8 @@ peek_stack: /* tell verifier to check for equivalent states * after every call and jump */ - env->explored_states[t + 1] = STATE_LIST_MARK; + if (t + 1 < insn_cnt) + env->explored_states[t + 1] = STATE_LIST_MARK; } else { /* conditional jump with two edges */ ret = push_insn(t, t + 1, FALLTHROUGH, env); @@ -1636,6 +1637,8 @@ static int do_check(struct verifier_env *env) if (err) return err; + src_reg_type = regs[insn->src_reg].type; + /* check that memory (src_reg + off) is readable, * the state of dst_reg will be updated by this func */ @@ -1645,9 +1648,12 @@ static int do_check(struct verifier_env *env) if (err) return err; - src_reg_type = regs[insn->src_reg].type; + if (BPF_SIZE(insn->code) != BPF_W) { + insn_idx++; + continue; + } - if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) { + if (insn->imm == 0) { /* saw a valid insn * dst_reg = *(u32 *)(src_reg + off) * use reserved 'imm' field to mark this insn diff --git a/kernel/fork.c b/kernel/fork.c index f2c1e7352298..03c1eaaa6ef5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -74,6 +74,7 @@ #include <linux/uprobes.h> #include <linux/aio.h> #include <linux/compiler.h> +#include <linux/sysctl.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -88,6 +89,16 @@ #include <trace/events/task.h> /* + * Minimum number of threads to boot the kernel + */ +#define MIN_THREADS 20 + +/* + * Maximum number of threads + */ +#define MAX_THREADS FUTEX_TID_MASK + +/* * Protected counters by write_lock_irq(&tasklist_lock) */ unsigned long total_forks; /* Handle normal Linux uptimes. */ @@ -253,7 +264,30 @@ EXPORT_SYMBOL_GPL(__put_task_struct); void __init __weak arch_task_cache_init(void) { } -void __init fork_init(unsigned long mempages) +/* + * set_max_threads + */ +static void set_max_threads(unsigned int max_threads_suggested) +{ + u64 threads; + + /* + * The number of threads shall be limited such that the thread + * structures may only consume a small part of the available memory. + */ + if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) + threads = MAX_THREADS; + else + threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, + (u64) THREAD_SIZE * 8UL); + + if (threads > max_threads_suggested) + threads = max_threads_suggested; + + max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); +} + +void __init fork_init(void) { #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN @@ -268,18 +302,7 @@ void __init fork_init(unsigned long mempages) /* do the arch specific task caches init */ arch_task_cache_init(); - /* - * The default maximum number of threads is set to a safe - * value: the thread structures can take up at most half - * of memory. - */ - max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); - - /* - * we need to allow at least 20 threads to boot a system - */ - if (max_threads < 20) - max_threads = 20; + set_max_threads(MAX_THREADS); init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; @@ -380,6 +403,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) */ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); + /* No ordering required: file already has been exposed. */ + RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); + mm->total_vm = oldmm->total_vm; mm->shared_vm = oldmm->shared_vm; mm->exec_vm = oldmm->exec_vm; @@ -505,7 +531,13 @@ static inline void mm_free_pgd(struct mm_struct *mm) pgd_free(mm, mm->pgd); } #else -#define dup_mmap(mm, oldmm) (0) +static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) +{ + down_write(&oldmm->mmap_sem); + RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); + up_write(&oldmm->mmap_sem); + return 0; +} #define mm_alloc_pgd(mm) (0) #define mm_free_pgd(mm) #endif /* CONFIG_MMU */ @@ -674,34 +706,53 @@ void mmput(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mmput); +/** + * set_mm_exe_file - change a reference to the mm's executable file + * + * This changes mm's executable file (shown as symlink /proc/[pid]/exe). + * + * Main users are mmput() and sys_execve(). Callers prevent concurrent + * invocations: in mmput() nobody alive left, in execve task is single + * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the + * mm->exe_file, but does so without using set_mm_exe_file() in order + * to do avoid the need for any locks. + */ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) { + struct file *old_exe_file; + + /* + * It is safe to dereference the exe_file without RCU as + * this function is only called if nobody else can access + * this mm -- see comment above for justification. + */ + old_exe_file = rcu_dereference_raw(mm->exe_file); + if (new_exe_file) get_file(new_exe_file); - if (mm->exe_file) - fput(mm->exe_file); - mm->exe_file = new_exe_file; + rcu_assign_pointer(mm->exe_file, new_exe_file); + if (old_exe_file) + fput(old_exe_file); } +/** + * get_mm_exe_file - acquire a reference to the mm's executable file + * + * Returns %NULL if mm has no associated executable file. + * User must release file via fput(). + */ struct file *get_mm_exe_file(struct mm_struct *mm) { struct file *exe_file; - /* We need mmap_sem to protect against races with removal of exe_file */ - down_read(&mm->mmap_sem); - exe_file = mm->exe_file; - if (exe_file) - get_file(exe_file); - up_read(&mm->mmap_sem); + rcu_read_lock(); + exe_file = rcu_dereference(mm->exe_file); + if (exe_file && !get_file_rcu(exe_file)) + exe_file = NULL; + rcu_read_unlock(); return exe_file; } - -static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) -{ - /* It's safe to write the exe_file pointer without exe_file_lock because - * this is called during fork when the task is not yet in /proc */ - newmm->exe_file = get_mm_exe_file(oldmm); -} +EXPORT_SYMBOL(get_mm_exe_file); /** * get_task_mm - acquire a reference to the task's mm @@ -864,8 +915,6 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) if (!mm_init(mm, tsk)) goto fail_nomem; - dup_mm_exe_file(oldmm, mm); - err = dup_mmap(mm, oldmm); if (err) goto free_pt; @@ -1403,10 +1452,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { - retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns_for_children); - if (!pid) + if (IS_ERR(pid)) { + retval = PTR_ERR(pid); goto bad_fork_cleanup_io; + } } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; @@ -2000,3 +2050,26 @@ int unshare_files(struct files_struct **displaced) task_unlock(task); return 0; } + +int sysctl_max_threads(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int ret; + int threads = max_threads; + int min = MIN_THREADS; + int max = MAX_THREADS; + + t = *table; + t.data = &threads; + t.extra1 = &min; + t.extra2 = &max; + + ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + + set_max_threads(threads); + + return 0; +} diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index b358a802fd18..a744098e4eb7 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/sched.h> #include "gcov.h" static int gcov_events_enabled; @@ -107,8 +108,10 @@ void gcov_enable_events(void) gcov_events_enabled = 1; /* Perform event callback for previously registered entries. */ - while ((info = gcov_info_next(info))) + while ((info = gcov_info_next(info))) { gcov_event(GCOV_ADD, info); + cond_resched(); + } mutex_unlock(&gcov_lock); } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index ba77ab5f64dd..a0831e1b99f4 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -551,7 +551,21 @@ static void print_lockdep_cache(struct lockdep_map *lock) static void print_lock(struct held_lock *hlock) { - print_lock_name(hlock_class(hlock)); + /* + * We can be called locklessly through debug_show_all_locks() so be + * extra careful, the hlock might have been released and cleared. + */ + unsigned int class_idx = hlock->class_idx; + + /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfields: */ + barrier(); + + if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) { + printk("<RELEASED>\n"); + return; + } + + print_lock_name(lock_classes + class_idx - 1); printk(", at: "); print_ip_sym(hlock->acquire_ip); } diff --git a/kernel/pid.c b/kernel/pid.c index cd36a5e0d173..4fd07d5b7baf 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -182,7 +182,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) spin_unlock_irq(&pidmap_lock); kfree(page); if (unlikely(!map->page)) - break; + return -ENOMEM; } if (likely(atomic_read(&map->nr_free))) { for ( ; ; ) { @@ -210,7 +210,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) } pid = mk_pid(pid_ns, map, offset); } - return -1; + return -EAGAIN; } int next_pidmap(struct pid_namespace *pid_ns, unsigned int last) @@ -301,17 +301,20 @@ struct pid *alloc_pid(struct pid_namespace *ns) int i, nr; struct pid_namespace *tmp; struct upid *upid; + int retval = -ENOMEM; pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); if (!pid) - goto out; + return ERR_PTR(retval); tmp = ns; pid->level = ns->level; for (i = ns->level; i >= 0; i--) { nr = alloc_pidmap(tmp); - if (nr < 0) + if (IS_ERR_VALUE(nr)) { + retval = nr; goto out_free; + } pid->numbers[i].nr = nr; pid->numbers[i].ns = tmp; @@ -339,7 +342,6 @@ struct pid *alloc_pid(struct pid_namespace *ns) } spin_unlock_irq(&pidmap_lock); -out: return pid; out_unlock: @@ -351,8 +353,7 @@ out_free: free_pidmap(pid->numbers + i); kmem_cache_free(ns->pid_cachep, pid); - pid = NULL; - goto out; + return ERR_PTR(retval); } void disable_pid_allocation(struct pid_namespace *ns) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 879edfc5ee52..c099b082cd02 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2017,24 +2017,6 @@ int add_preferred_console(char *name, int idx, char *options) return __add_preferred_console(name, idx, options, NULL); } -int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) -{ - struct console_cmdline *c; - int i; - - for (i = 0, c = console_cmdline; - i < MAX_CMDLINECONSOLES && c->name[0]; - i++, c++) - if (strcmp(c->name, name) == 0 && c->index == idx) { - strlcpy(c->name, name_new, sizeof(c->name)); - c->options = options; - c->index = idx_new; - return i; - } - /* not found */ - return -1; -} - bool console_suspend_enabled = true; EXPORT_SYMBOL(console_suspend_enabled); @@ -2436,9 +2418,6 @@ void register_console(struct console *newcon) if (preferred_console < 0 || bcon || !console_drivers) preferred_console = selected_console; - if (newcon->early_setup) - newcon->early_setup(); - /* * See if we want to use this console driver. If we * didn't select a console we take the first one @@ -2464,23 +2443,27 @@ void register_console(struct console *newcon) for (i = 0, c = console_cmdline; i < MAX_CMDLINECONSOLES && c->name[0]; i++, c++) { - BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); - if (strcmp(c->name, newcon->name) != 0) - continue; - if (newcon->index >= 0 && - newcon->index != c->index) - continue; - if (newcon->index < 0) - newcon->index = c->index; + if (!newcon->match || + newcon->match(newcon, c->name, c->index, c->options) != 0) { + /* default matching */ + BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); + if (strcmp(c->name, newcon->name) != 0) + continue; + if (newcon->index >= 0 && + newcon->index != c->index) + continue; + if (newcon->index < 0) + newcon->index = c->index; - if (_braille_register_console(newcon, c)) - return; + if (_braille_register_console(newcon, c)) + return; + + if (newcon->setup && + newcon->setup(newcon, c->options) != 0) + break; + } - if (newcon->setup && - newcon->setup(newcon, console_cmdline[i].options) != 0) - break; newcon->flags |= CON_ENABLED; - newcon->index = c->index; if (i == selected_console) { newcon->flags |= CON_CONSDEV; preferred_console = selected_console; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 227fec36b12a..c8e0e050a36a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -456,8 +456,6 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) static int ptrace_detach(struct task_struct *child, unsigned int data) { - bool dead = false; - if (!valid_signal(data)) return -EIO; @@ -467,18 +465,19 @@ static int ptrace_detach(struct task_struct *child, unsigned int data) write_lock_irq(&tasklist_lock); /* - * This child can be already killed. Make sure de_thread() or - * our sub-thread doing do_wait() didn't do release_task() yet. + * We rely on ptrace_freeze_traced(). It can't be killed and + * untraced by another thread, it can't be a zombie. */ - if (child->ptrace) { - child->exit_code = data; - dead = __ptrace_detach(current, child); - } + WARN_ON(!child->ptrace || child->exit_state); + /* + * tasklist_lock avoids the race with wait_task_stopped(), see + * the comment in ptrace_resume(). + */ + child->exit_code = data; + __ptrace_detach(current, child); write_unlock_irq(&tasklist_lock); proc_ptrace_connector(child, PTRACE_DETACH); - if (unlikely(dead)) - release_task(child); return 0; } @@ -697,6 +696,8 @@ static int ptrace_peek_siginfo(struct task_struct *child, static int ptrace_resume(struct task_struct *child, long request, unsigned long data) { + bool need_siglock; + if (!valid_signal(data)) return -EIO; @@ -724,8 +725,26 @@ static int ptrace_resume(struct task_struct *child, long request, user_disable_single_step(child); } + /* + * Change ->exit_code and ->state under siglock to avoid the race + * with wait_task_stopped() in between; a non-zero ->exit_code will + * wrongly look like another report from tracee. + * + * Note that we need siglock even if ->exit_code == data and/or this + * status was not reported yet, the new status must not be cleared by + * wait_task_stopped() after resume. + * + * If data == 0 we do not care if wait_task_stopped() reports the old + * status and clears the code too; this can't race with the tracee, it + * takes siglock after resume. + */ + need_siglock = data && !thread_group_empty(current); + if (need_siglock) + spin_lock_irq(&child->sighand->siglock); child->exit_code = data; wake_up_state(child, __TASK_TRACED); + if (need_siglock) + spin_unlock_irq(&child->sighand->siglock); return 0; } diff --git a/kernel/signal.c b/kernel/signal.c index a390499943e4..d51c5ddd855c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2992,11 +2992,9 @@ static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info) * Nor can they impersonate a kill()/tgkill(), which adds source info. */ if ((info->si_code >= 0 || info->si_code == SI_TKILL) && - (task_pid_vnr(current) != pid)) { - /* We used to allow any < 0 si_code */ - WARN_ON_ONCE(info->si_code < 0); + (task_pid_vnr(current) != pid)) return -EPERM; - } + info->si_signo = sig; /* POSIX.1b doesn't mention process groups. */ @@ -3041,12 +3039,10 @@ static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) /* Not even root can pretend to send signals from the kernel. * Nor can they impersonate a kill()/tgkill(), which adds source info. */ - if (((info->si_code >= 0 || info->si_code == SI_TKILL)) && - (task_pid_vnr(current) != pid)) { - /* We used to allow any < 0 si_code */ - WARN_ON_ONCE(info->si_code < 0); + if ((info->si_code >= 0 || info->si_code == SI_TKILL) && + (task_pid_vnr(current) != pid)) return -EPERM; - } + info->si_signo = sig; return do_send_specific(tgid, pid, sig, info); diff --git a/kernel/smp.c b/kernel/smp.c index f38a1e692259..07854477c164 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -19,7 +19,7 @@ enum { CSD_FLAG_LOCK = 0x01, - CSD_FLAG_WAIT = 0x02, + CSD_FLAG_SYNCHRONOUS = 0x02, }; struct call_function_data { @@ -107,7 +107,7 @@ void __init call_function_init(void) */ static void csd_lock_wait(struct call_single_data *csd) { - while (csd->flags & CSD_FLAG_LOCK) + while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK) cpu_relax(); } @@ -121,19 +121,17 @@ static void csd_lock(struct call_single_data *csd) * to ->flags with any subsequent assignments to other * fields of the specified call_single_data structure: */ - smp_mb(); + smp_wmb(); } static void csd_unlock(struct call_single_data *csd) { - WARN_ON((csd->flags & CSD_FLAG_WAIT) && !(csd->flags & CSD_FLAG_LOCK)); + WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); /* * ensure we're all done before releasing data: */ - smp_mb(); - - csd->flags &= ~CSD_FLAG_LOCK; + smp_store_release(&csd->flags, 0); } static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); @@ -144,13 +142,16 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); * ->func, ->info, and ->flags set. */ static int generic_exec_single(int cpu, struct call_single_data *csd, - smp_call_func_t func, void *info, int wait) + smp_call_func_t func, void *info) { - struct call_single_data csd_stack = { .flags = 0 }; - unsigned long flags; - - if (cpu == smp_processor_id()) { + unsigned long flags; + + /* + * We can unlock early even for the synchronous on-stack case, + * since we're doing this from the same CPU.. + */ + csd_unlock(csd); local_irq_save(flags); func(info); local_irq_restore(flags); @@ -158,24 +159,14 @@ static int generic_exec_single(int cpu, struct call_single_data *csd, } - if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) + if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) { + csd_unlock(csd); return -ENXIO; - - - if (!csd) { - csd = &csd_stack; - if (!wait) - csd = this_cpu_ptr(&csd_data); } - csd_lock(csd); - csd->func = func; csd->info = info; - if (wait) - csd->flags |= CSD_FLAG_WAIT; - /* * The list addition should be visible before sending the IPI * handler locks the list to pull the entry off it because of @@ -190,9 +181,6 @@ static int generic_exec_single(int cpu, struct call_single_data *csd, if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) arch_send_call_function_single_ipi(cpu); - if (wait) - csd_lock_wait(csd); - return 0; } @@ -250,8 +238,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) } llist_for_each_entry_safe(csd, csd_next, entry, llist) { - csd->func(csd->info); - csd_unlock(csd); + smp_call_func_t func = csd->func; + void *info = csd->info; + + /* Do we wait until *after* callback? */ + if (csd->flags & CSD_FLAG_SYNCHRONOUS) { + func(info); + csd_unlock(csd); + } else { + csd_unlock(csd); + func(info); + } } /* @@ -274,6 +271,8 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) int smp_call_function_single(int cpu, smp_call_func_t func, void *info, int wait) { + struct call_single_data *csd; + struct call_single_data csd_stack = { .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS }; int this_cpu; int err; @@ -292,7 +291,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() && !oops_in_progress); - err = generic_exec_single(cpu, NULL, func, info, wait); + csd = &csd_stack; + if (!wait) { + csd = this_cpu_ptr(&csd_data); + csd_lock(csd); + } + + err = generic_exec_single(cpu, csd, func, info); + + if (wait) + csd_lock_wait(csd); put_cpu(); @@ -321,7 +329,15 @@ int smp_call_function_single_async(int cpu, struct call_single_data *csd) int err = 0; preempt_disable(); - err = generic_exec_single(cpu, csd, csd->func, csd->info, 0); + + /* We could deadlock if we have to wait here with interrupts disabled! */ + if (WARN_ON_ONCE(csd->flags & CSD_FLAG_LOCK)) + csd_lock_wait(csd); + + csd->flags = CSD_FLAG_LOCK; + smp_wmb(); + + err = generic_exec_single(cpu, csd, csd->func, csd->info); preempt_enable(); return err; @@ -433,6 +449,8 @@ void smp_call_function_many(const struct cpumask *mask, struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); csd_lock(csd); + if (wait) + csd->flags |= CSD_FLAG_SYNCHRONOUS; csd->func = func; csd->info = info; llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)); diff --git a/kernel/sys.c b/kernel/sys.c index 3be344902316..a4e372b798a5 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1649,14 +1649,13 @@ SYSCALL_DEFINE1(umask, int, mask) return mask; } -static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) +static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { struct fd exe; + struct file *old_exe, *exe_file; struct inode *inode; int err; - VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); - exe = fdget(fd); if (!exe.file) return -EBADF; @@ -1680,15 +1679,22 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) /* * Forbid mm->exe_file change if old file still mapped. */ + exe_file = get_mm_exe_file(mm); err = -EBUSY; - if (mm->exe_file) { + if (exe_file) { struct vm_area_struct *vma; - for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_file && - path_equal(&vma->vm_file->f_path, - &mm->exe_file->f_path)) - goto exit; + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!vma->vm_file) + continue; + if (path_equal(&vma->vm_file->f_path, + &exe_file->f_path)) + goto exit_err; + } + + up_read(&mm->mmap_sem); + fput(exe_file); } /* @@ -1702,10 +1708,18 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) goto exit; err = 0; - set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ + /* set the new file, lockless */ + get_file(exe.file); + old_exe = xchg(&mm->exe_file, exe.file); + if (old_exe) + fput(old_exe); exit: fdput(exe); return err; +exit_err: + up_read(&mm->mmap_sem); + fput(exe_file); + goto exit; } #ifdef CONFIG_CHECKPOINT_RESTORE @@ -1840,10 +1854,9 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; } - down_write(&mm->mmap_sem); if (prctl_map.exe_fd != (u32)-1) - error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd); - downgrade_write(&mm->mmap_sem); + error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd); + down_read(&mm->mmap_sem); if (error) goto out; @@ -1909,12 +1922,8 @@ static int prctl_set_mm(int opt, unsigned long addr, if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - if (opt == PR_SET_MM_EXE_FILE) { - down_write(&mm->mmap_sem); - error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr); - up_write(&mm->mmap_sem); - return error; - } + if (opt == PR_SET_MM_EXE_FILE) + return prctl_set_mm_exe_file(mm, (unsigned int)addr); if (addr >= TASK_SIZE || addr < mmap_min_addr) return -EINVAL; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 42b7fc2860c1..2082b1a88fb9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -93,11 +93,9 @@ #include <linux/nmi.h> #endif - #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ -extern int max_threads; extern int suid_dumpable; #ifdef CONFIG_COREDUMP extern int core_uses_pid; @@ -710,10 +708,10 @@ static struct ctl_table kern_table[] = { #endif { .procname = "threads-max", - .data = &max_threads, + .data = NULL, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = sysctl_max_threads, }, { .procname = "random", @@ -1983,7 +1981,15 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, int write, void *data) { if (write) { - *valp = *negp ? -*lvalp : *lvalp; + if (*negp) { + if (*lvalp > (unsigned long) INT_MAX + 1) + return -EINVAL; + *valp = -*lvalp; + } else { + if (*lvalp > (unsigned long) INT_MAX) + return -EINVAL; + *valp = *lvalp; + } } else { int val = *valp; if (val < 0) { diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 25d942d1da27..11dc22a6983b 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -440,7 +440,7 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) mutex_unlock(&clockevents_mutex); return ret; } -EXPORT_SYMBOL_GPL(clockevents_unbind); +EXPORT_SYMBOL_GPL(clockevents_unbind_device); /* Sanity check of state transition callbacks */ static int clockevents_sanity_check(struct clock_event_device *dev) |