summaryrefslogtreecommitdiff
path: root/fs/exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exec.c')
-rw-r--r--fs/exec.c866
1 files changed, 127 insertions, 739 deletions
diff --git a/fs/exec.c b/fs/exec.c
index da27b91ff1e8..9c73def87642 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -59,26 +59,15 @@
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/tlb.h>
-#include <asm/exec.h>
#include <trace/events/task.h>
#include "internal.h"
+#include "coredump.h"
#include <trace/events/sched.h>
-int core_uses_pid;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
-unsigned int core_pipe_limit;
int suid_dumpable = 0;
-struct core_name {
- char *corename;
- int used, size;
-};
-static atomic_t call_count = ATOMIC_INIT(1);
-
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);
@@ -116,25 +105,26 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
SYSCALL_DEFINE1(uselib, const char __user *, library)
{
struct file *file;
- char *tmp = getname(library);
+ struct filename *tmp = getname(library);
int error = PTR_ERR(tmp);
static const struct open_flags uselib_flags = {
.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
.acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
- .intent = LOOKUP_OPEN
+ .intent = LOOKUP_OPEN,
+ .lookup_flags = LOOKUP_FOLLOW,
};
if (IS_ERR(tmp))
goto out;
- file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
+ file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
putname(tmp);
error = PTR_ERR(file);
if (IS_ERR(file))
goto out;
error = -EINVAL;
- if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
+ if (!S_ISREG(file_inode(file)->i_mode))
goto exit;
error = -EACCES;
@@ -366,7 +356,7 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
* flags, permissions, and offset, so we use temporary values. We'll update
* them later in setup_arg_pages().
*/
-int bprm_mm_init(struct linux_binprm *bprm)
+static int bprm_mm_init(struct linux_binprm *bprm)
{
int err;
struct mm_struct *mm = NULL;
@@ -402,7 +392,7 @@ struct user_arg_ptr {
union {
const char __user *const __user *native;
#ifdef CONFIG_COMPAT
- compat_uptr_t __user *compat;
+ const compat_uptr_t __user *compat;
#endif
} ptr;
};
@@ -445,8 +435,9 @@ static int count(struct user_arg_ptr argv, int max)
if (IS_ERR(p))
return -EFAULT;
- if (i++ >= max)
+ if (i >= max)
return -E2BIG;
+ ++i;
if (fatal_signal_pending(current))
return -ERESTARTNOHAND;
@@ -613,7 +604,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* process cleanup to remove whatever mess we made.
*/
if (length != move_page_tables(vma, old_start,
- vma, new_start, length))
+ vma, new_start, length, false))
return -ENOMEM;
lru_add_drain();
@@ -623,7 +614,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* when the old and new regions overlap clear from new_end.
*/
free_pgd_range(&tlb, new_end, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : 0);
+ vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
} else {
/*
* otherwise, clean from old_start; this is done to not touch
@@ -632,7 +623,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* for the others its just a little faster.
*/
free_pgd_range(&tlb, old_start, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : 0);
+ vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
}
tlb_finish_mmu(&tlb, new_end, old_end);
@@ -762,18 +753,20 @@ struct file *open_exec(const char *name)
{
struct file *file;
int err;
+ struct filename tmp = { .name = name };
static const struct open_flags open_exec_flags = {
.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
.acc_mode = MAY_EXEC | MAY_OPEN,
- .intent = LOOKUP_OPEN
+ .intent = LOOKUP_OPEN,
+ .lookup_flags = LOOKUP_FOLLOW,
};
- file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
+ file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags);
if (IS_ERR(file))
goto out;
err = -EACCES;
- if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
+ if (!S_ISREG(file_inode(file)->i_mode))
goto exit;
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
@@ -811,6 +804,15 @@ int kernel_read(struct file *file, loff_t offset,
EXPORT_SYMBOL(kernel_read);
+ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
+{
+ ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos);
+ if (res > 0)
+ flush_icache_range(addr, addr + len);
+ return res;
+}
+EXPORT_SYMBOL(read_code);
+
static int exec_mmap(struct mm_struct *mm)
{
struct task_struct *tsk;
@@ -888,9 +890,11 @@ static int de_thread(struct task_struct *tsk)
sig->notify_count--;
while (sig->notify_count) {
- __set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_KILLABLE);
spin_unlock_irq(lock);
schedule();
+ if (unlikely(__fatal_signal_pending(tsk)))
+ goto killed;
spin_lock_irq(lock);
}
spin_unlock_irq(lock);
@@ -905,12 +909,16 @@ static int de_thread(struct task_struct *tsk)
sig->notify_count = -1; /* for exit_notify() */
for (;;) {
+ threadgroup_change_begin(tsk);
write_lock_irq(&tasklist_lock);
if (likely(leader->exit_state))
break;
- __set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_KILLABLE);
write_unlock_irq(&tasklist_lock);
+ threadgroup_change_end(tsk);
schedule();
+ if (unlikely(__fatal_signal_pending(tsk)))
+ goto killed;
}
/*
@@ -924,6 +932,7 @@ static int de_thread(struct task_struct *tsk)
* also take its birthdate (always earlier than our own).
*/
tsk->start_time = leader->start_time;
+ tsk->real_start_time = leader->real_start_time;
BUG_ON(!same_thread_group(leader, tsk));
BUG_ON(has_group_leader_pid(tsk));
@@ -939,9 +948,8 @@ static int de_thread(struct task_struct *tsk)
* Note: The old leader also uses this pid until release_task
* is called. Odd but simple and correct.
*/
- detach_pid(tsk, PIDTYPE_PID);
tsk->pid = leader->pid;
- attach_pid(tsk, PIDTYPE_PID, task_pid(leader));
+ change_pid(tsk, PIDTYPE_PID, task_pid(leader));
transfer_pid(leader, tsk, PIDTYPE_PGID);
transfer_pid(leader, tsk, PIDTYPE_SID);
@@ -965,6 +973,7 @@ static int de_thread(struct task_struct *tsk)
if (unlikely(leader->ptrace))
__wake_up_parent(leader, leader->parent);
write_unlock_irq(&tasklist_lock);
+ threadgroup_change_end(tsk);
release_task(leader);
}
@@ -1004,40 +1013,14 @@ no_thread_group:
BUG_ON(!thread_group_leader(tsk));
return 0;
-}
-
-/*
- * These functions flushes out all traces of the currently running executable
- * so that a new one can be started
- */
-static void flush_old_files(struct files_struct * files)
-{
- long j = -1;
- struct fdtable *fdt;
-
- spin_lock(&files->file_lock);
- for (;;) {
- unsigned long set, i;
- j++;
- i = j * __NFDBITS;
- fdt = files_fdtable(files);
- if (i >= fdt->max_fds)
- break;
- set = fdt->close_on_exec[j];
- if (!set)
- continue;
- fdt->close_on_exec[j] = 0;
- spin_unlock(&files->file_lock);
- for ( ; set ; i++,set >>= 1) {
- if (set & 1) {
- sys_close(i);
- }
- }
- spin_lock(&files->file_lock);
-
- }
- spin_unlock(&files->file_lock);
+killed:
+ /* protects against exit_notify() and __exit_signal() */
+ read_lock(&tasklist_lock);
+ sig->group_exit_task = NULL;
+ sig->notify_count = 0;
+ read_unlock(&tasklist_lock);
+ return -EAGAIN;
}
char *get_task_comm(char *buf, struct task_struct *tsk)
@@ -1050,20 +1033,15 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(get_task_comm);
+/*
+ * These functions flushes out all traces of the currently running executable
+ * so that a new one can be started
+ */
+
void set_task_comm(struct task_struct *tsk, char *buf)
{
task_lock(tsk);
-
trace_task_rename(tsk, buf);
-
- /*
- * Threads may access current->comm without holding
- * the task lock, so write the string carefully.
- * Readers without a lock may see incomplete new
- * names but are safe from non-terminating string reads.
- */
- memset(tsk->comm, 0, TASK_COMM_LEN);
- wmb();
strlcpy(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
perf_event_comm(tsk);
@@ -1110,7 +1088,8 @@ int flush_old_exec(struct linux_binprm * bprm)
bprm->mm = NULL; /* We're using it now */
set_fs(USER_DS);
- current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD);
+ current->flags &=
+ ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE);
flush_thread();
current->personality &= ~bprm->per_clear;
@@ -1123,7 +1102,7 @@ EXPORT_SYMBOL(flush_old_exec);
void would_dump(struct linux_binprm *bprm, struct file *file)
{
- if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
+ if (inode_permission(file_inode(file), MAY_READ) < 0)
bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
}
EXPORT_SYMBOL(would_dump);
@@ -1136,7 +1115,7 @@ void setup_new_exec(struct linux_binprm * bprm)
current->sas_ss_sp = current->sas_ss_size = 0;
if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))
- set_dumpable(current->mm, 1);
+ set_dumpable(current->mm, SUID_DUMP_USER);
else
set_dumpable(current->mm, suid_dumpable);
@@ -1158,20 +1137,13 @@ void setup_new_exec(struct linux_binprm * bprm)
set_dumpable(current->mm, suid_dumpable);
}
- /*
- * Flush performance counters when crossing a
- * security domain:
- */
- if (!get_dumpable(current->mm))
- perf_event_exit_task(current);
-
/* An exec changes our domain. We are no longer part of the thread
group */
current->self_exec_id++;
flush_signal_handlers(current, 0);
- flush_old_files(current->files);
+ do_close_on_exec(current->files);
}
EXPORT_SYMBOL(setup_new_exec);
@@ -1201,9 +1173,24 @@ void free_bprm(struct linux_binprm *bprm)
mutex_unlock(&current->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
+ /* If a binfmt changed the interp, free it. */
+ if (bprm->interp != bprm->filename)
+ kfree(bprm->interp);
kfree(bprm);
}
+int bprm_change_interp(char *interp, struct linux_binprm *bprm)
+{
+ /* If a binfmt changed the interp, free it first. */
+ if (bprm->interp != bprm->filename)
+ kfree(bprm->interp);
+ bprm->interp = kstrdup(interp, GFP_KERNEL);
+ if (!bprm->interp)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL(bprm_change_interp);
+
/*
* install the new credentials for this executable
*/
@@ -1213,6 +1200,15 @@ void install_exec_creds(struct linux_binprm *bprm)
commit_creds(bprm->cred);
bprm->cred = NULL;
+
+ /*
+ * Disable monitoring for regular users
+ * when executing setuid binaries. Must
+ * wait until new credentials are committed
+ * by commit_creds() above
+ */
+ if (get_dumpable(current->mm) != SUID_DUMP_USER)
+ perf_event_exit_task(current);
/*
* cred_guard_mutex must be held at least to this point to prevent
* ptrace_attach() from altering our determination of the task's
@@ -1280,7 +1276,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
int prepare_binprm(struct linux_binprm *bprm)
{
umode_t mode;
- struct inode * inode = bprm->file->f_path.dentry->d_inode;
+ struct inode * inode = file_inode(bprm->file);
int retval;
mode = inode->i_mode;
@@ -1292,14 +1288,13 @@ int prepare_binprm(struct linux_binprm *bprm)
bprm->cred->egid = current_egid();
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
- !current->no_new_privs) {
+ !current->no_new_privs &&
+ kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
+ kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
/* Set-uid? */
if (mode & S_ISUID) {
- if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid))
- return -EPERM;
bprm->per_clear |= PER_CLEAR_ON_SETID;
bprm->cred->euid = inode->i_uid;
-
}
/* Set-gid? */
@@ -1309,8 +1304,6 @@ int prepare_binprm(struct linux_binprm *bprm)
* executable.
*/
if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
- if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid))
- return -EPERM;
bprm->per_clear |= PER_CLEAR_ON_SETID;
bprm->cred->egid = inode->i_gid;
}
@@ -1375,13 +1368,17 @@ EXPORT_SYMBOL(remove_arg_zero);
/*
* cycle the list of binary formats handler, until one recognizes the image
*/
-int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
+int search_binary_handler(struct linux_binprm *bprm)
{
unsigned int depth = bprm->recursion_depth;
int try,retval;
struct linux_binfmt *fmt;
pid_t old_pid, old_vpid;
+ /* This allows 4 levels of binfmt rewrites before failing hard. */
+ if (depth > 5)
+ return -ELOOP;
+
retval = security_bprm_check(bprm);
if (retval)
return retval;
@@ -1400,18 +1397,14 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
for (try=0; try<2; try++) {
read_lock(&binfmt_lock);
list_for_each_entry(fmt, &formats, lh) {
- int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
+ int (*fn)(struct linux_binprm *) = fmt->load_binary;
if (!fn)
continue;
if (!try_module_get(fmt->module))
continue;
read_unlock(&binfmt_lock);
- retval = fn(bprm, regs);
- /*
- * Restore the depth counter to its starting value
- * in this call, so we don't have to rely on every
- * load_binary function to restore it on return.
- */
+ bprm->recursion_depth = depth + 1;
+ retval = fn(bprm);
bprm->recursion_depth = depth;
if (retval >= 0) {
if (depth == 0) {
@@ -1465,15 +1458,13 @@ EXPORT_SYMBOL(search_binary_handler);
*/
static int do_execve_common(const char *filename,
struct user_arg_ptr argv,
- struct user_arg_ptr envp,
- struct pt_regs *regs)
+ struct user_arg_ptr envp)
{
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
bool clear_in_exec;
int retval;
- const struct cred *cred = current_cred();
/*
* We move the actual failure in case of RLIMIT_NPROC excess from
@@ -1482,7 +1473,7 @@ static int do_execve_common(const char *filename,
* whether NPROC limit is still exceeded.
*/
if ((current->flags & PF_NPROC_EXCEEDED) &&
- atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) {
+ atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
retval = -EAGAIN;
goto out_ret;
}
@@ -1550,7 +1541,7 @@ static int do_execve_common(const char *filename,
if (retval < 0)
goto out;
- retval = search_binary_handler(bprm,regs);
+ retval = search_binary_handler(bprm);
if (retval < 0)
goto out;
@@ -1592,19 +1583,17 @@ out_ret:
int do_execve(const char *filename,
const char __user *const __user *__argv,
- const char __user *const __user *__envp,
- struct pt_regs *regs)
+ const char __user *const __user *__envp)
{
struct user_arg_ptr argv = { .ptr.native = __argv };
struct user_arg_ptr envp = { .ptr.native = __envp };
- return do_execve_common(filename, argv, envp, regs);
+ return do_execve_common(filename, argv, envp);
}
#ifdef CONFIG_COMPAT
-int compat_do_execve(char *filename,
- compat_uptr_t __user *__argv,
- compat_uptr_t __user *__envp,
- struct pt_regs *regs)
+static int compat_do_execve(const char *filename,
+ const compat_uptr_t __user *__argv,
+ const compat_uptr_t __user *__envp)
{
struct user_arg_ptr argv = {
.is_compat = true,
@@ -1614,7 +1603,7 @@ int compat_do_execve(char *filename,
.is_compat = true,
.ptr.compat = __envp,
};
- return do_execve_common(filename, argv, envp, regs);
+ return do_execve_common(filename, argv, envp);
}
#endif
@@ -1632,353 +1621,6 @@ void set_binfmt(struct linux_binfmt *new)
EXPORT_SYMBOL(set_binfmt);
-static int expand_corename(struct core_name *cn)
-{
- char *old_corename = cn->corename;
-
- cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
- cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
-
- if (!cn->corename) {
- kfree(old_corename);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static int cn_printf(struct core_name *cn, const char *fmt, ...)
-{
- char *cur;
- int need;
- int ret;
- va_list arg;
-
- va_start(arg, fmt);
- need = vsnprintf(NULL, 0, fmt, arg);
- va_end(arg);
-
- if (likely(need < cn->size - cn->used - 1))
- goto out_printf;
-
- ret = expand_corename(cn);
- if (ret)
- goto expand_fail;
-
-out_printf:
- cur = cn->corename + cn->used;
- va_start(arg, fmt);
- vsnprintf(cur, need + 1, fmt, arg);
- va_end(arg);
- cn->used += need;
- return 0;
-
-expand_fail:
- return ret;
-}
-
-static void cn_escape(char *str)
-{
- for (; *str; str++)
- if (*str == '/')
- *str = '!';
-}
-
-static int cn_print_exe_file(struct core_name *cn)
-{
- struct file *exe_file;
- char *pathbuf, *path;
- int ret;
-
- exe_file = get_mm_exe_file(current->mm);
- if (!exe_file) {
- char *commstart = cn->corename + cn->used;
- ret = cn_printf(cn, "%s (path unknown)", current->comm);
- cn_escape(commstart);
- return ret;
- }
-
- pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
- if (!pathbuf) {
- ret = -ENOMEM;
- goto put_exe_file;
- }
-
- path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
- if (IS_ERR(path)) {
- ret = PTR_ERR(path);
- goto free_buf;
- }
-
- cn_escape(path);
-
- ret = cn_printf(cn, "%s", path);
-
-free_buf:
- kfree(pathbuf);
-put_exe_file:
- fput(exe_file);
- return ret;
-}
-
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
- */
-static int format_corename(struct core_name *cn, long signr)
-{
- const struct cred *cred = current_cred();
- const char *pat_ptr = core_pattern;
- int ispipe = (*pat_ptr == '|');
- int pid_in_pattern = 0;
- int err = 0;
-
- cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
- cn->corename = kmalloc(cn->size, GFP_KERNEL);
- cn->used = 0;
-
- if (!cn->corename)
- return -ENOMEM;
-
- /* Repeat as long as we have more pattern to process and more output
- space */
- while (*pat_ptr) {
- if (*pat_ptr != '%') {
- if (*pat_ptr == 0)
- goto out;
- err = cn_printf(cn, "%c", *pat_ptr++);
- } else {
- switch (*++pat_ptr) {
- /* single % at the end, drop that */
- case 0:
- goto out;
- /* Double percent, output one percent */
- case '%':
- err = cn_printf(cn, "%c", '%');
- break;
- /* pid */
- case 'p':
- pid_in_pattern = 1;
- err = cn_printf(cn, "%d",
- task_tgid_vnr(current));
- break;
- /* uid */
- case 'u':
- err = cn_printf(cn, "%d", cred->uid);
- break;
- /* gid */
- case 'g':
- err = cn_printf(cn, "%d", cred->gid);
- break;
- /* signal that caused the coredump */
- case 's':
- err = cn_printf(cn, "%ld", signr);
- break;
- /* UNIX time of coredump */
- case 't': {
- struct timeval tv;
- do_gettimeofday(&tv);
- err = cn_printf(cn, "%lu", tv.tv_sec);
- break;
- }
- /* hostname */
- case 'h': {
- char *namestart = cn->corename + cn->used;
- down_read(&uts_sem);
- err = cn_printf(cn, "%s",
- utsname()->nodename);
- up_read(&uts_sem);
- cn_escape(namestart);
- break;
- }
- /* executable */
- case 'e': {
- char *commstart = cn->corename + cn->used;
- err = cn_printf(cn, "%s", current->comm);
- cn_escape(commstart);
- break;
- }
- case 'E':
- err = cn_print_exe_file(cn);
- break;
- /* core limit size */
- case 'c':
- err = cn_printf(cn, "%lu",
- rlimit(RLIMIT_CORE));
- break;
- default:
- break;
- }
- ++pat_ptr;
- }
-
- if (err)
- return err;
- }
-
- /* Backward compatibility with core_uses_pid:
- *
- * If core_pattern does not include a %p (as is the default)
- * and core_uses_pid is set, then .%pid will be appended to
- * the filename. Do not do this for piped commands. */
- if (!ispipe && !pid_in_pattern && core_uses_pid) {
- err = cn_printf(cn, ".%d", task_tgid_vnr(current));
- if (err)
- return err;
- }
-out:
- return ispipe;
-}
-
-static int zap_process(struct task_struct *start, int exit_code)
-{
- struct task_struct *t;
- int nr = 0;
-
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
-
- t = start;
- do {
- task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
- if (t != current && t->mm) {
- sigaddset(&t->pending.signal, SIGKILL);
- signal_wake_up(t, 1);
- nr++;
- }
- } while_each_thread(start, t);
-
- return nr;
-}
-
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- struct core_state *core_state, int exit_code)
-{
- struct task_struct *g, *p;
- unsigned long flags;
- int nr = -EAGAIN;
-
- spin_lock_irq(&tsk->sighand->siglock);
- if (!signal_group_exit(tsk->signal)) {
- mm->core_state = core_state;
- nr = zap_process(tsk, exit_code);
- }
- spin_unlock_irq(&tsk->sighand->siglock);
- if (unlikely(nr < 0))
- return nr;
-
- if (atomic_read(&mm->mm_users) == nr + 1)
- goto done;
- /*
- * We should find and kill all tasks which use this mm, and we should
- * count them correctly into ->nr_threads. We don't take tasklist
- * lock, but this is safe wrt:
- *
- * fork:
- * None of sub-threads can fork after zap_process(leader). All
- * processes which were created before this point should be
- * visible to zap_threads() because copy_process() adds the new
- * process to the tail of init_task.tasks list, and lock/unlock
- * of ->siglock provides a memory barrier.
- *
- * do_exit:
- * The caller holds mm->mmap_sem. This means that the task which
- * uses this mm can't pass exit_mm(), so it can't exit or clear
- * its ->mm.
- *
- * de_thread:
- * It does list_replace_rcu(&leader->tasks, &current->tasks),
- * we must see either old or new leader, this does not matter.
- * However, it can change p->sighand, so lock_task_sighand(p)
- * must be used. Since p->mm != NULL and we hold ->mmap_sem
- * it can't fail.
- *
- * Note also that "g" can be the old leader with ->mm == NULL
- * and already unhashed and thus removed from ->thread_group.
- * This is OK, __unhash_process()->list_del_rcu() does not
- * clear the ->next pointer, we will find the new leader via
- * next_thread().
- */
- rcu_read_lock();
- for_each_process(g) {
- if (g == tsk->group_leader)
- continue;
- if (g->flags & PF_KTHREAD)
- continue;
- p = g;
- do {
- if (p->mm) {
- if (unlikely(p->mm == mm)) {
- lock_task_sighand(p, &flags);
- nr += zap_process(p, exit_code);
- unlock_task_sighand(p, &flags);
- }
- break;
- }
- } while_each_thread(g, p);
- }
- rcu_read_unlock();
-done:
- atomic_set(&core_state->nr_threads, nr);
- return nr;
-}
-
-static int coredump_wait(int exit_code, struct core_state *core_state)
-{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- int core_waiters = -EBUSY;
-
- init_completion(&core_state->startup);
- core_state->dumper.task = tsk;
- core_state->dumper.next = NULL;
-
- down_write(&mm->mmap_sem);
- if (!mm->core_state)
- core_waiters = zap_threads(tsk, mm, core_state, exit_code);
- up_write(&mm->mmap_sem);
-
- if (core_waiters > 0) {
- struct core_thread *ptr;
-
- wait_for_completion(&core_state->startup);
- /*
- * Wait for all the threads to become inactive, so that
- * all the thread context (extended register state, like
- * fpu etc) gets copied to the memory.
- */
- ptr = core_state->dumper.next;
- while (ptr != NULL) {
- wait_task_inactive(ptr->task, 0);
- ptr = ptr->next;
- }
- }
-
- return core_waiters;
-}
-
-static void coredump_finish(struct mm_struct *mm)
-{
- struct core_thread *curr, *next;
- struct task_struct *task;
-
- next = mm->core_state->dumper.next;
- while ((curr = next) != NULL) {
- next = curr->next;
- task = curr->task;
- /*
- * see exit_mm(), curr->task must not see
- * ->task == NULL before we read ->next.
- */
- smp_mb();
- curr->task = NULL;
- wake_up_process(task);
- }
-
- mm->core_state = NULL;
-}
-
/*
* set_dumpable converts traditional three-value dumpable to two flags and
* stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -2002,17 +1644,17 @@ static void coredump_finish(struct mm_struct *mm)
void set_dumpable(struct mm_struct *mm, int value)
{
switch (value) {
- case 0:
+ case SUID_DUMP_DISABLE:
clear_bit(MMF_DUMPABLE, &mm->flags);
smp_wmb();
clear_bit(MMF_DUMP_SECURELY, &mm->flags);
break;
- case 1:
+ case SUID_DUMP_USER:
set_bit(MMF_DUMPABLE, &mm->flags);
smp_wmb();
clear_bit(MMF_DUMP_SECURELY, &mm->flags);
break;
- case 2:
+ case SUID_DUMP_ROOT:
set_bit(MMF_DUMP_SECURELY, &mm->flags);
smp_wmb();
set_bit(MMF_DUMPABLE, &mm->flags);
@@ -2020,12 +1662,12 @@ void set_dumpable(struct mm_struct *mm, int value)
}
}
-static int __get_dumpable(unsigned long mm_flags)
+int __get_dumpable(unsigned long mm_flags)
{
int ret;
ret = mm_flags & MMF_DUMPABLE_MASK;
- return (ret >= 2) ? 2 : ret;
+ return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
}
int get_dumpable(struct mm_struct *mm)
@@ -2033,284 +1675,30 @@ int get_dumpable(struct mm_struct *mm)
return __get_dumpable(mm->flags);
}
-static void wait_for_dump_helpers(struct file *file)
-{
- struct pipe_inode_info *pipe;
-
- pipe = file->f_path.dentry->d_inode->i_pipe;
-
- pipe_lock(pipe);
- pipe->readers++;
- pipe->writers--;
-
- while ((pipe->readers > 1) && (!signal_pending(current))) {
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- pipe_wait(pipe);
- }
-
- pipe->readers--;
- pipe->writers++;
- pipe_unlock(pipe);
-
-}
-
-
-/*
- * umh_pipe_setup
- * helper function to customize the process used
- * to collect the core in userspace. Specifically
- * it sets up a pipe and installs it as fd 0 (stdin)
- * for the process. Returns 0 on success, or
- * PTR_ERR on failure.
- * Note that it also sets the core limit to 1. This
- * is a special value that we use to trap recursive
- * core dumps
- */
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
-{
- struct file *rp, *wp;
- struct fdtable *fdt;
- struct coredump_params *cp = (struct coredump_params *)info->data;
- struct files_struct *cf = current->files;
-
- wp = create_write_pipe(0);
- if (IS_ERR(wp))
- return PTR_ERR(wp);
-
- rp = create_read_pipe(wp, 0);
- if (IS_ERR(rp)) {
- free_write_pipe(wp);
- return PTR_ERR(rp);
- }
-
- cp->file = wp;
-
- sys_close(0);
- fd_install(0, rp);
- spin_lock(&cf->file_lock);
- fdt = files_fdtable(cf);
- __set_open_fd(0, fdt);
- __clear_close_on_exec(0, fdt);
- spin_unlock(&cf->file_lock);
-
- /* and disallow core files too */
- current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
-
- return 0;
-}
-
-void do_coredump(long signr, int exit_code, struct pt_regs *regs)
+SYSCALL_DEFINE3(execve,
+ const char __user *, filename,
+ const char __user *const __user *, argv,
+ const char __user *const __user *, envp)
{
- struct core_state core_state;
- struct core_name cn;
- struct mm_struct *mm = current->mm;
- struct linux_binfmt * binfmt;
- const struct cred *old_cred;
- struct cred *cred;
- int retval = 0;
- int flag = 0;
- int ispipe;
- static atomic_t core_dump_count = ATOMIC_INIT(0);
- struct coredump_params cprm = {
- .signr = signr,
- .regs = regs,
- .limit = rlimit(RLIMIT_CORE),
- /*
- * We must use the same mm->flags while dumping core to avoid
- * inconsistency of bit flags, since this flag is not protected
- * by any locks.
- */
- .mm_flags = mm->flags,
- };
-
- audit_core_dumps(signr);
-
- binfmt = mm->binfmt;
- if (!binfmt || !binfmt->core_dump)
- goto fail;
- if (!__get_dumpable(cprm.mm_flags))
- goto fail;
-
- cred = prepare_creds();
- if (!cred)
- goto fail;
- /*
- * We cannot trust fsuid as being the "true" uid of the
- * process nor do we know its entire history. We only know it
- * was tainted so we dump it as root in mode 2.
- */
- if (__get_dumpable(cprm.mm_flags) == 2) {
- /* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
- cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
+ struct filename *path = getname(filename);
+ int error = PTR_ERR(path);
+ if (!IS_ERR(path)) {
+ error = do_execve(path->name, argv, envp);
+ putname(path);
}
-
- retval = coredump_wait(exit_code, &core_state);
- if (retval < 0)
- goto fail_creds;
-
- old_cred = override_creds(cred);
-
- /*
- * Clear any false indication of pending signals that might
- * be seen by the filesystem code called to write the core file.
- */
- clear_thread_flag(TIF_SIGPENDING);
-
- ispipe = format_corename(&cn, signr);
-
- if (ispipe) {
- int dump_count;
- char **helper_argv;
-
- if (ispipe < 0) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
- }
-
- if (cprm.limit == 1) {
- /*
- * Normally core limits are irrelevant to pipes, since
- * we're not writing to the file system, but we use
- * cprm.limit of 1 here as a speacial value. Any
- * non-1 limit gets set to RLIM_INFINITY below, but
- * a limit of 0 skips the dump. This is a consistent
- * way to catch recursive crashes. We can still crash
- * if the core_pattern binary sets RLIM_CORE = !1
- * but it runs as root, and can do lots of stupid things
- * Note that we use task_tgid_vnr here to grab the pid
- * of the process group leader. That way we get the
- * right pid if a thread in a multi-threaded
- * core_pattern process dies.
- */
- printk(KERN_WARNING
- "Process %d(%s) has RLIMIT_CORE set to 1\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Aborting core\n");
- goto fail_unlock;
- }
- cprm.limit = RLIM_INFINITY;
-
- dump_count = atomic_inc_return(&core_dump_count);
- if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_dropcount;
- }
-
- helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
- if (!helper_argv) {
- printk(KERN_WARNING "%s failed to allocate memory\n",
- __func__);
- goto fail_dropcount;
- }
-
- retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
- NULL, UMH_WAIT_EXEC, umh_pipe_setup,
- NULL, &cprm);
- argv_free(helper_argv);
- if (retval) {
- printk(KERN_INFO "Core dump to %s pipe failed\n",
- cn.corename);
- goto close_fail;
- }
- } else {
- struct inode *inode;
-
- if (cprm.limit < binfmt->min_coredump)
- goto fail_unlock;
-
- cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
- 0600);
- if (IS_ERR(cprm.file))
- goto fail_unlock;
-
- inode = cprm.file->f_path.dentry->d_inode;
- if (inode->i_nlink > 1)
- goto close_fail;
- if (d_unhashed(cprm.file->f_path.dentry))
- goto close_fail;
- /*
- * AK: actually i see no reason to not allow this for named
- * pipes etc, but keep the previous behaviour for now.
- */
- if (!S_ISREG(inode->i_mode))
- goto close_fail;
- /*
- * Dont allow local users get cute and trick others to coredump
- * into their pre-created files.
- */
- if (!uid_eq(inode->i_uid, current_fsuid()))
- goto close_fail;
- if (!cprm.file->f_op || !cprm.file->f_op->write)
- goto close_fail;
- if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
- goto close_fail;
- }
-
- retval = binfmt->core_dump(&cprm);
- if (retval)
- current->signal->group_exit_code |= 0x80;
-
- if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(cprm.file);
-close_fail:
- if (cprm.file)
- filp_close(cprm.file, NULL);
-fail_dropcount:
- if (ispipe)
- atomic_dec(&core_dump_count);
-fail_unlock:
- kfree(cn.corename);
-fail_corename:
- coredump_finish(mm);
- revert_creds(old_cred);
-fail_creds:
- put_cred(cred);
-fail:
- return;
-}
-
-/*
- * Core dumping helper functions. These are the only things you should
- * do on a core-file: use only these functions to write out all the
- * necessary info.
- */
-int dump_write(struct file *file, const void *addr, int nr)
-{
- return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+ return error;
}
-EXPORT_SYMBOL(dump_write);
-
-int dump_seek(struct file *file, loff_t off)
-{
- int ret = 1;
-
- if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
- return 0;
- } else {
- char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
- if (!buf)
- return 0;
- while (off > 0) {
- unsigned long n = off;
-
- if (n > PAGE_SIZE)
- n = PAGE_SIZE;
- if (!dump_write(file, buf, n)) {
- ret = 0;
- break;
- }
- off -= n;
- }
- free_page((unsigned long)buf);
+#ifdef CONFIG_COMPAT
+asmlinkage long compat_sys_execve(const char __user * filename,
+ const compat_uptr_t __user * argv,
+ const compat_uptr_t __user * envp)
+{
+ struct filename *path = getname(filename);
+ int error = PTR_ERR(path);
+ if (!IS_ERR(path)) {
+ error = compat_do_execve(path->name, argv, envp);
+ putname(path);
}
- return ret;
+ return error;
}
-EXPORT_SYMBOL(dump_seek);
+#endif